anomaly_detection 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +2 -2
- data/ext/anomaly_detection/anomaly_detection.cpp +6 -21
- data/ext/anomaly_detection/dist.h +180 -0
- data/lib/anomaly_detection/version.rb +1 -1
- data/licenses/LICENSE-MIT-dist-h.txt +21 -0
- data/licenses/UNLICENSE-dist-h.txt +24 -0
- metadata +6 -6
- data/ext/anomaly_detection/cdflib.cpp +0 -12126
- data/ext/anomaly_detection/cdflib.hpp +0 -123
- data/licenses/LICENSE-cdflib.txt +0 -165
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cfb709e43863f4221a67e8f675f28b5361f6bf33a0d0f6fa4f52cdc0cad01796
|
4
|
+
data.tar.gz: 40965f08bb75cdb673d43e42c7fc47403fbfb1b082de10cab86bac569916d5b8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '0496d044ecbe143be64164bd88092c5a5bd660f5fec6425e9f6ea0759f2ab7dbaa41a055c9d03a78c8e5a661b86bb25629c5e7a809614ba415d941f4d63fc9cb'
|
7
|
+
data.tar.gz: 8cc3f28c981d0be5cdb3dbfd054910d45469d258a662e163d7b01379af93e6714874d043ff1521ae19506f742b63eb93bf631f6f895c11842c4c15028c66a4b8
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -58,7 +58,7 @@ AnomalyDetection.detect(
|
|
58
58
|
)
|
59
59
|
```
|
60
60
|
|
61
|
-
## Plotting
|
61
|
+
## Plotting
|
62
62
|
|
63
63
|
Add [Vega](https://github.com/ankane/vega) to your application’s Gemfile:
|
64
64
|
|
@@ -74,7 +74,7 @@ AnomalyDetection.plot(series, anomalies)
|
|
74
74
|
|
75
75
|
## Credits
|
76
76
|
|
77
|
-
This library was ported from the [AnomalyDetection](https://github.com/twitter/AnomalyDetection) R package and is available under the same license. It uses [
|
77
|
+
This library was ported from the [AnomalyDetection](https://github.com/twitter/AnomalyDetection) R package and is available under the same license. It uses [stl-cpp](https://github.com/ankane/stl-cpp) for seasonal-trend decomposition and [dist.h](https://github.com/ankane/dist.h) for the quantile function.
|
78
78
|
|
79
79
|
## References
|
80
80
|
|
@@ -6,7 +6,7 @@
|
|
6
6
|
#include <vector>
|
7
7
|
|
8
8
|
#include "anomaly_detection.hpp"
|
9
|
-
#include "
|
9
|
+
#include "dist.h"
|
10
10
|
#include "stl.hpp"
|
11
11
|
|
12
12
|
namespace anomaly_detection {
|
@@ -25,21 +25,7 @@ float mad(const std::vector<float>& data, float med) {
|
|
25
25
|
return 1.4826 * median(res);
|
26
26
|
}
|
27
27
|
|
28
|
-
|
29
|
-
int which = 2;
|
30
|
-
double q = 1 - p;
|
31
|
-
double t;
|
32
|
-
int status;
|
33
|
-
double bound;
|
34
|
-
cdft(&which, &p, &q, &t, &df, &status, &bound);
|
35
|
-
|
36
|
-
if (status != 0) {
|
37
|
-
throw std::invalid_argument("Bad status");
|
38
|
-
}
|
39
|
-
return t;
|
40
|
-
}
|
41
|
-
|
42
|
-
std::vector<size_t> detect_anoms(const std::vector<float>& data, int num_obs_per_period, float k, float alpha, bool one_tail, bool upper_tail, bool verbose, std::function<void()> interrupt) {
|
28
|
+
std::vector<size_t> detect_anoms(const std::vector<float>& data, int num_obs_per_period, float k, float alpha, bool one_tail, bool upper_tail, bool verbose, std::function<void()> check_for_interrupts) {
|
43
29
|
auto n = data.size();
|
44
30
|
|
45
31
|
// Check to make sure we have at least two periods worth of data for anomaly context
|
@@ -78,8 +64,7 @@ std::vector<size_t> detect_anoms(const std::vector<float>& data, int num_obs_per
|
|
78
64
|
|
79
65
|
// Compute test statistic until r=max_outliers values have been removed from the sample
|
80
66
|
for (auto i = 1; i <= max_outliers; i++) {
|
81
|
-
|
82
|
-
interrupt();
|
67
|
+
check_for_interrupts();
|
83
68
|
|
84
69
|
if (verbose) {
|
85
70
|
std::cout << i << " / " << max_outliers << " completed" << std::endl;
|
@@ -129,7 +114,7 @@ std::vector<size_t> detect_anoms(const std::vector<float>& data, int num_obs_per
|
|
129
114
|
p = 1.0 - alpha / (2.0 * (n - i + 1));
|
130
115
|
}
|
131
116
|
|
132
|
-
auto t =
|
117
|
+
auto t = students_t_ppf(p, n - i - 1);
|
133
118
|
auto lam = t * (n - i) / sqrt(((n - i - 1) + powf(t, 2.0)) * (n - i + 1));
|
134
119
|
|
135
120
|
if (r > lam) {
|
@@ -145,10 +130,10 @@ std::vector<size_t> detect_anoms(const std::vector<float>& data, int num_obs_per
|
|
145
130
|
return anomalies;
|
146
131
|
}
|
147
132
|
|
148
|
-
std::vector<size_t> anomalies(const std::vector<float>& x, int period, float k, float alpha, Direction direction, bool verbose, std::function<void()>
|
133
|
+
std::vector<size_t> anomalies(const std::vector<float>& x, int period, float k, float alpha, Direction direction, bool verbose, std::function<void()> check_for_interrupts) {
|
149
134
|
bool one_tail = direction != Direction::Both;
|
150
135
|
bool upper_tail = direction == Direction::Positive;
|
151
|
-
return detect_anoms(x, period, k, alpha, one_tail, upper_tail, verbose,
|
136
|
+
return detect_anoms(x, period, k, alpha, one_tail, upper_tail, verbose, check_for_interrupts);
|
152
137
|
}
|
153
138
|
|
154
139
|
}
|
@@ -0,0 +1,180 @@
|
|
1
|
+
/*!
|
2
|
+
* dist.h v0.1.0
|
3
|
+
* https://github.com/ankane/dist.h
|
4
|
+
* Unlicense OR MIT License
|
5
|
+
*/
|
6
|
+
|
7
|
+
#pragma once
|
8
|
+
|
9
|
+
#define _USE_MATH_DEFINES
|
10
|
+
|
11
|
+
#include <assert.h>
|
12
|
+
#include <math.h>
|
13
|
+
|
14
|
+
// Winitzki, S. (2008).
|
15
|
+
// A handy approximation for the error function and its inverse.
|
16
|
+
// https://drive.google.com/file/d/0B2Mt7luZYBrwZlctV3A3eF82VGM/view?resourcekey=0-UQpPhwZgzP0sF4LHBDlLtg
|
17
|
+
// from https://sites.google.com/site/winitzki
|
18
|
+
double erf(double x) {
|
19
|
+
double sign = x < 0 ? -1.0 : 1.0;
|
20
|
+
x = x < 0 ? -x : x;
|
21
|
+
|
22
|
+
double a = 0.14;
|
23
|
+
double x2 = x * x;
|
24
|
+
return sign * sqrt(1.0 - exp(-x2 * (4.0 / M_PI + a * x2) / (1.0 + a * x2)));
|
25
|
+
}
|
26
|
+
|
27
|
+
// Winitzki, S. (2008).
|
28
|
+
// A handy approximation for the error function and its inverse.
|
29
|
+
// https://drive.google.com/file/d/0B2Mt7luZYBrwZlctV3A3eF82VGM/view?resourcekey=0-UQpPhwZgzP0sF4LHBDlLtg
|
30
|
+
// from https://sites.google.com/site/winitzki
|
31
|
+
double inverse_erf(double x) {
|
32
|
+
double sign = x < 0 ? -1.0 : 1.0;
|
33
|
+
x = x < 0 ? -x : x;
|
34
|
+
|
35
|
+
double a = 0.147;
|
36
|
+
double ln = log(1.0 - x * x);
|
37
|
+
double f1 = 2.0 / (M_PI * a);
|
38
|
+
double f2 = ln / 2.0;
|
39
|
+
double f3 = f1 + f2;
|
40
|
+
double f4 = 1.0 / a * ln;
|
41
|
+
return sign * sqrt(-f1 - f2 + sqrt(f3 * f3 - f4));
|
42
|
+
}
|
43
|
+
|
44
|
+
double normal_pdf(double x, double mean, double std_dev) {
|
45
|
+
double var = std_dev * std_dev;
|
46
|
+
return (1.0 / (var * sqrt(2.0 * M_PI))) * pow(M_E, -0.5 * pow((x - mean) / var, 2));
|
47
|
+
}
|
48
|
+
|
49
|
+
double normal_cdf(double x, double mean, double std_dev) {
|
50
|
+
return 0.5 * (1.0 + erf((x - mean) / (std_dev * std_dev * sqrt(2))));
|
51
|
+
}
|
52
|
+
|
53
|
+
double normal_ppf(double p, double mean, double std_dev) {
|
54
|
+
assert(p >= 0 && p <= 1);
|
55
|
+
|
56
|
+
return mean + (std_dev * std_dev) * sqrt(2) * inverse_erf(2.0 * p - 1.0);
|
57
|
+
}
|
58
|
+
|
59
|
+
double students_t_pdf(double x, unsigned int n) {
|
60
|
+
assert(n >= 1);
|
61
|
+
|
62
|
+
return tgamma((n + 1.0) / 2.0) / (sqrt(n * M_PI) * tgamma(n / 2.0)) * pow(1.0 + x * x / n, -(n + 1.0) / 2.0);
|
63
|
+
}
|
64
|
+
|
65
|
+
// Hill, G. W. (1970).
|
66
|
+
// Algorithm 395: Student's t-distribution.
|
67
|
+
// Communications of the ACM, 13(10), 617-619.
|
68
|
+
double students_t_cdf(double x, unsigned int n) {
|
69
|
+
assert(n >= 1);
|
70
|
+
|
71
|
+
double start = x < 0 ? 0 : 1;
|
72
|
+
double sign = x < 0 ? 1 : -1;
|
73
|
+
|
74
|
+
double z = 1.0;
|
75
|
+
double t = x * x;
|
76
|
+
double y = t / n;
|
77
|
+
double b = 1.0 + y;
|
78
|
+
|
79
|
+
if ((n >= 20 && t < n) || n > 200) {
|
80
|
+
// asymptotic series for large or noninteger n
|
81
|
+
if (y > 10e-6) {
|
82
|
+
y = log(b);
|
83
|
+
}
|
84
|
+
double a = n - 0.5;
|
85
|
+
b = 48.0 * a * a;
|
86
|
+
y = a * y;
|
87
|
+
y = (((((-0.4 * y - 3.3) * y - 24.0) * y - 85.5) / (0.8 * y * y + 100.0 + b) + y + 3.0) / b + 1.0) * sqrt(y);
|
88
|
+
return start + sign * normal_cdf(-y, 0.0, 1.0);
|
89
|
+
}
|
90
|
+
|
91
|
+
if (n < 20 && t < 4.0) {
|
92
|
+
// nested summation of cosine series
|
93
|
+
y = sqrt(y);
|
94
|
+
double a = y;
|
95
|
+
if (n == 1) {
|
96
|
+
a = 0.0;
|
97
|
+
}
|
98
|
+
|
99
|
+
// loop
|
100
|
+
if (n > 1) {
|
101
|
+
n -= 2;
|
102
|
+
while (n > 1) {
|
103
|
+
a = (n - 1) / (b * n) * a + y;
|
104
|
+
n -= 2;
|
105
|
+
}
|
106
|
+
}
|
107
|
+
a = n == 0 ? a / sqrt(b) : (atan(y) + a / b) * (2.0 / M_PI);
|
108
|
+
return start + sign * (z - a) / 2;
|
109
|
+
}
|
110
|
+
|
111
|
+
// tail series expanation for large t-values
|
112
|
+
double a = sqrt(b);
|
113
|
+
y = a * n;
|
114
|
+
int j = 0;
|
115
|
+
while (a != z) {
|
116
|
+
j += 2;
|
117
|
+
z = a;
|
118
|
+
y = y * (j - 1) / (b * j);
|
119
|
+
a = a + y / (n + j);
|
120
|
+
}
|
121
|
+
z = 0.0;
|
122
|
+
y = 0.0;
|
123
|
+
a = -a;
|
124
|
+
|
125
|
+
// loop (without n + 2 and n - 2)
|
126
|
+
while (n > 1) {
|
127
|
+
a = (n - 1) / (b * n) * a + y;
|
128
|
+
n -= 2;
|
129
|
+
}
|
130
|
+
a = n == 0 ? a / sqrt(b) : (atan(y) + a / b) * (2.0 / M_PI);
|
131
|
+
return start + sign * (z - a) / 2;
|
132
|
+
}
|
133
|
+
|
134
|
+
// Hill, G. W. (1970).
|
135
|
+
// Algorithm 396: Student's t-quantiles.
|
136
|
+
// Communications of the ACM, 13(10), 619-620.
|
137
|
+
double students_t_ppf(double p, unsigned int n) {
|
138
|
+
assert(p >= 0 && p <= 1);
|
139
|
+
assert(n >= 1);
|
140
|
+
|
141
|
+
// distribution is symmetric
|
142
|
+
double sign = p < 0.5 ? -1 : 1;
|
143
|
+
p = p < 0.5 ? 1 - p : p;
|
144
|
+
|
145
|
+
// two-tail to one-tail
|
146
|
+
p = 2.0 * (1.0 - p);
|
147
|
+
|
148
|
+
if (n == 2) {
|
149
|
+
return sign * sqrt(2.0 / (p * (2.0 - p)) - 2.0);
|
150
|
+
}
|
151
|
+
|
152
|
+
double half_pi = M_PI / 2.0;
|
153
|
+
|
154
|
+
if (n == 1) {
|
155
|
+
p = p * half_pi;
|
156
|
+
return sign * cos(p) / sin(p);
|
157
|
+
}
|
158
|
+
|
159
|
+
double a = 1.0 / (n - 0.5);
|
160
|
+
double b = 48.0 / (a * a);
|
161
|
+
double c = ((20700.0 * a / b - 98.0) * a - 16.0) * a + 96.36;
|
162
|
+
double d = ((94.5 / (b + c) - 3.0) / b + 1.0) * sqrt(a * half_pi) * n;
|
163
|
+
double x = d * p;
|
164
|
+
double y = pow(x, 2.0 / n);
|
165
|
+
if (y > 0.05 + a) {
|
166
|
+
// asymptotic inverse expansion about normal
|
167
|
+
x = normal_ppf(p * 0.5, 0.0, 1.0);
|
168
|
+
y = x * x;
|
169
|
+
if (n < 5) {
|
170
|
+
c += 0.3 * (n - 4.5) * (x + 0.6);
|
171
|
+
}
|
172
|
+
c = (((0.05 * d * x - 5.0) * x - 7.0) * x - 2.0) * x + b + c;
|
173
|
+
y = (((((0.4 * y + 6.3) * y + 36.0) * y + 94.5) / c - y - 3.0) / b + 1.0) * x;
|
174
|
+
y = a * y * y;
|
175
|
+
y = y > 0.002 ? exp(y) - 1.0 : 0.5 * y * y + y;
|
176
|
+
} else {
|
177
|
+
y = ((1.0 / (((n + 6.0) / (n * y) - 0.089 * d - 0.822) * (n + 2.0) * 3.0) + 0.5 / (n + 4.0)) * y - 1.0) * (n + 1.0) / (n + 2.0) + 1.0 / y;
|
178
|
+
}
|
179
|
+
return sign * sqrt(n * y);
|
180
|
+
}
|
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2021 Contributors
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
@@ -0,0 +1,24 @@
|
|
1
|
+
This is free and unencumbered software released into the public domain.
|
2
|
+
|
3
|
+
Anyone is free to copy, modify, publish, use, compile, sell, or
|
4
|
+
distribute this software, either in source code form or as a compiled
|
5
|
+
binary, for any purpose, commercial or non-commercial, and by any
|
6
|
+
means.
|
7
|
+
|
8
|
+
In jurisdictions that recognize copyright laws, the author or authors
|
9
|
+
of this software dedicate any and all copyright interest in the
|
10
|
+
software to the public domain. We make this dedication for the benefit
|
11
|
+
of the public at large and to the detriment of our heirs and
|
12
|
+
successors. We intend this dedication to be an overt act of
|
13
|
+
relinquishment in perpetuity of all present and future rights to this
|
14
|
+
software under copyright law.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
19
|
+
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
20
|
+
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
21
|
+
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
22
|
+
OTHER DEALINGS IN THE SOFTWARE.
|
23
|
+
|
24
|
+
For more information, please refer to <http://unlicense.org/>
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: anomaly_detection
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-01-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rice
|
@@ -37,15 +37,15 @@ files:
|
|
37
37
|
- README.md
|
38
38
|
- ext/anomaly_detection/anomaly_detection.cpp
|
39
39
|
- ext/anomaly_detection/anomaly_detection.hpp
|
40
|
-
- ext/anomaly_detection/
|
41
|
-
- ext/anomaly_detection/cdflib.hpp
|
40
|
+
- ext/anomaly_detection/dist.h
|
42
41
|
- ext/anomaly_detection/ext.cpp
|
43
42
|
- ext/anomaly_detection/extconf.rb
|
44
43
|
- ext/anomaly_detection/stl.hpp
|
45
44
|
- lib/anomaly_detection.rb
|
46
45
|
- lib/anomaly_detection/version.rb
|
46
|
+
- licenses/LICENSE-MIT-dist-h.txt
|
47
47
|
- licenses/LICENSE-MIT-stl-cpp.txt
|
48
|
-
- licenses/
|
48
|
+
- licenses/UNLICENSE-dist-h.txt
|
49
49
|
- licenses/UNLICENSE-stl-cpp.txt
|
50
50
|
homepage: https://github.com/ankane/AnomalyDetection.rb
|
51
51
|
licenses:
|
@@ -66,7 +66,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
66
66
|
- !ruby/object:Gem::Version
|
67
67
|
version: '0'
|
68
68
|
requirements: []
|
69
|
-
rubygems_version: 3.2.
|
69
|
+
rubygems_version: 3.2.32
|
70
70
|
signing_key:
|
71
71
|
specification_version: 4
|
72
72
|
summary: Time series anomaly detection for Ruby
|