anomaly_detection 0.1.4 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/NOTICE.txt +1 -1
- data/ext/anomaly_detection/anomaly_detection.hpp +15 -8
- data/ext/anomaly_detection/dist.h +90 -44
- data/ext/anomaly_detection/stl.hpp +103 -50
- data/lib/anomaly_detection/version.rb +1 -1
- data/lib/anomaly_detection.rb +57 -2
- data/licenses/NOTICE-AnomalyDetection-cpp.txt +15 -0
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: da5eb71023f77a4c05e6322c020ef602e8e22b7b5ba516fce99679af702c881d
|
4
|
+
data.tar.gz: 26560c8dd893c491bd3094202ff82ae33eefdcdba74fe4386b006f7f522906df
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ec2e1459ca2410ee6ab1bce3fe9c528d6419b75e10c6448f1fe5b3030a2e3d8de320a23a9bded17702a01fd23d112007b909c8611e2da6c1ff4f8521352c89ac
|
7
|
+
data.tar.gz: ad150705d6e32a111c3bc044ef7f99910beebe572e07799719e72118422b7a9e6439943cac8b86d613537d7d0fb52cba86a668faf78d135abc888ce3737f8104
|
data/CHANGELOG.md
CHANGED
data/NOTICE.txt
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
/*!
|
2
|
-
* AnomalyDetection.cpp v0.1.
|
2
|
+
* AnomalyDetection.cpp v0.1.3
|
3
3
|
* https://github.com/ankane/AnomalyDetection.cpp
|
4
4
|
* GPL-3.0-or-later License
|
5
5
|
*/
|
@@ -39,7 +39,7 @@ float mad(const std::vector<float>& data, float med) {
|
|
39
39
|
return 1.4826 * median_sorted(res);
|
40
40
|
}
|
41
41
|
|
42
|
-
std::vector<size_t> detect_anoms(const std::vector<float>& data,
|
42
|
+
std::vector<size_t> detect_anoms(const std::vector<float>& data, size_t num_obs_per_period, float k, float alpha, bool one_tail, bool upper_tail, bool verbose, std::function<void()> callback) {
|
43
43
|
auto n = data.size();
|
44
44
|
|
45
45
|
// Check to make sure we have at least two periods worth of data for anomaly context
|
@@ -53,15 +53,22 @@ std::vector<size_t> detect_anoms(const std::vector<float>& data, int num_obs_per
|
|
53
53
|
throw std::invalid_argument("series contains NANs");
|
54
54
|
}
|
55
55
|
|
56
|
-
// Decompose data. This returns a univarite remainder which will be used for anomaly detection. Optionally, we might NOT decompose.
|
57
|
-
auto data_decomp = stl::params().robust(true).seasonal_length(data.size() * 10 + 1).fit(data, num_obs_per_period);
|
58
|
-
auto seasonal = data_decomp.seasonal;
|
59
|
-
|
60
56
|
std::vector<float> data2;
|
61
57
|
data2.reserve(n);
|
62
58
|
auto med = median(data);
|
63
|
-
|
64
|
-
|
59
|
+
|
60
|
+
if (num_obs_per_period > 1) {
|
61
|
+
// Decompose data. This returns a univarite remainder which will be used for anomaly detection. Optionally, we might NOT decompose.
|
62
|
+
auto data_decomp = stl::params().robust(true).seasonal_length(data.size() * 10 + 1).fit(data, num_obs_per_period);
|
63
|
+
auto seasonal = data_decomp.seasonal;
|
64
|
+
|
65
|
+
for (size_t i = 0; i < n; i++) {
|
66
|
+
data2.push_back(data[i] - seasonal[i] - med);
|
67
|
+
}
|
68
|
+
} else {
|
69
|
+
for (size_t i = 0; i < n; i++) {
|
70
|
+
data2.push_back(data[i] - med);
|
71
|
+
}
|
65
72
|
}
|
66
73
|
|
67
74
|
auto num_anoms = 0;
|
@@ -1,12 +1,11 @@
|
|
1
1
|
/*!
|
2
|
-
* dist.h v0.
|
2
|
+
* dist.h v0.3.0
|
3
3
|
* https://github.com/ankane/dist.h
|
4
4
|
* Unlicense OR MIT License
|
5
5
|
*/
|
6
6
|
|
7
7
|
#pragma once
|
8
8
|
|
9
|
-
#include <assert.h>
|
10
9
|
#include <math.h>
|
11
10
|
|
12
11
|
#ifdef M_E
|
@@ -21,53 +20,77 @@
|
|
21
20
|
#define DIST_PI 3.14159265358979323846
|
22
21
|
#endif
|
23
22
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
double sign = x < 0 ? -1.0 : 1.0;
|
30
|
-
x = x < 0 ? -x : x;
|
31
|
-
|
32
|
-
double a = 0.14;
|
33
|
-
double x2 = x * x;
|
34
|
-
return sign * sqrt(1.0 - exp(-x2 * (4.0 / DIST_PI + a * x2) / (1.0 + a * x2)));
|
35
|
-
}
|
36
|
-
|
37
|
-
// Winitzki, S. (2008).
|
38
|
-
// A handy approximation for the error function and its inverse.
|
39
|
-
// https://drive.google.com/file/d/0B2Mt7luZYBrwZlctV3A3eF82VGM/view?resourcekey=0-UQpPhwZgzP0sF4LHBDlLtg
|
40
|
-
// from https://sites.google.com/site/winitzki
|
41
|
-
double inverse_erf(double x) {
|
42
|
-
double sign = x < 0 ? -1.0 : 1.0;
|
43
|
-
x = x < 0 ? -x : x;
|
44
|
-
|
45
|
-
double a = 0.147;
|
46
|
-
double ln = log(1.0 - x * x);
|
47
|
-
double f1 = 2.0 / (DIST_PI * a);
|
48
|
-
double f2 = ln / 2.0;
|
49
|
-
double f3 = f1 + f2;
|
50
|
-
double f4 = 1.0 / a * ln;
|
51
|
-
return sign * sqrt(-f1 - f2 + sqrt(f3 * f3 - f4));
|
52
|
-
}
|
23
|
+
#ifdef M_SQRT2
|
24
|
+
#define DIST_SQRT2 M_SQRT2
|
25
|
+
#else
|
26
|
+
#define DIST_SQRT2 1.41421356237309504880
|
27
|
+
#endif
|
53
28
|
|
54
29
|
double normal_pdf(double x, double mean, double std_dev) {
|
55
|
-
|
56
|
-
|
30
|
+
if (std_dev <= 0) {
|
31
|
+
return NAN;
|
32
|
+
}
|
33
|
+
|
34
|
+
double n = (x - mean) / std_dev;
|
35
|
+
return (1.0 / (std_dev * sqrt(2.0 * DIST_PI))) * pow(DIST_E, -0.5 * n * n);
|
57
36
|
}
|
58
37
|
|
59
38
|
double normal_cdf(double x, double mean, double std_dev) {
|
60
|
-
|
39
|
+
if (std_dev <= 0) {
|
40
|
+
return NAN;
|
41
|
+
}
|
42
|
+
|
43
|
+
return 0.5 * (1.0 + erf((x - mean) / (std_dev * DIST_SQRT2)));
|
61
44
|
}
|
62
45
|
|
46
|
+
// Wichura, M. J. (1988).
|
47
|
+
// Algorithm AS 241: The Percentage Points of the Normal Distribution.
|
48
|
+
// Journal of the Royal Statistical Society. Series C (Applied Statistics), 37(3), 477-484.
|
63
49
|
double normal_ppf(double p, double mean, double std_dev) {
|
64
|
-
|
50
|
+
if (p < 0 || p > 1 || std_dev <= 0 || isnan(mean) || isnan(std_dev)) {
|
51
|
+
return NAN;
|
52
|
+
}
|
53
|
+
|
54
|
+
if (p == 0) {
|
55
|
+
return -INFINITY;
|
56
|
+
}
|
65
57
|
|
66
|
-
|
58
|
+
if (p == 1) {
|
59
|
+
return INFINITY;
|
60
|
+
}
|
61
|
+
|
62
|
+
double q = p - 0.5;
|
63
|
+
if (fabs(q) < 0.425) {
|
64
|
+
double r = 0.180625 - q * q;
|
65
|
+
return mean + std_dev * q *
|
66
|
+
(((((((2.5090809287301226727e3 * r + 3.3430575583588128105e4) * r + 6.7265770927008700853e4) * r + 4.5921953931549871457e4) * r + 1.3731693765509461125e4) * r + 1.9715909503065514427e3) * r + 1.3314166789178437745e2) * r + 3.3871328727963666080e0) /
|
67
|
+
(((((((5.2264952788528545610e3 * r + 2.8729085735721942674e4) * r + 3.9307895800092710610e4) * r + 2.1213794301586595867e4) * r + 5.3941960214247511077e3) * r + 6.8718700749205790830e2) * r + 4.2313330701600911252e1) * r + 1);
|
68
|
+
} else {
|
69
|
+
double r = q < 0 ? p : 1 - p;
|
70
|
+
r = sqrt(-log(r));
|
71
|
+
double sign = q < 0 ? -1 : 1;
|
72
|
+
if (r < 5) {
|
73
|
+
r -= 1.6;
|
74
|
+
return mean + std_dev * sign *
|
75
|
+
(((((((7.74545014278341407640e-4 * r + 2.27238449892691845833e-2) * r + 2.41780725177450611770e-1) * r + 1.27045825245236838258e0) * r + 3.64784832476320460504e0) * r + 5.76949722146069140550e0) * r + 4.63033784615654529590e0) * r + 1.42343711074968357734e0) /
|
76
|
+
(((((((1.05075007164441684324e-9 * r + 5.47593808499534494600e-4) * r + 1.51986665636164571966e-2) * r + 1.48103976427480074590e-1) * r + 6.89767334985100004550e-1) * r + 1.67638483018380384940e0) * r + 2.05319162663775882187e0) * r + 1);
|
77
|
+
} else {
|
78
|
+
r -= 5;
|
79
|
+
return mean + std_dev * sign *
|
80
|
+
(((((((2.01033439929228813265e-7 * r + 2.71155556874348757815e-5) * r + 1.24266094738807843860e-3) * r + 2.65321895265761230930e-2) * r + 2.96560571828504891230e-1) * r + 1.78482653991729133580e0) * r + 5.46378491116411436990e0) * r + 6.65790464350110377720e0) /
|
81
|
+
(((((((2.04426310338993978564e-15 * r + 1.42151175831644588870e-7) * r + 1.84631831751005468180e-5) * r + 7.86869131145613259100e-4) * r + 1.48753612908506148525e-2) * r + 1.36929880922735805310e-1) * r + 5.99832206555887937690e-1) * r + 1);
|
82
|
+
}
|
83
|
+
}
|
67
84
|
}
|
68
85
|
|
69
|
-
double students_t_pdf(double x,
|
70
|
-
|
86
|
+
double students_t_pdf(double x, double n) {
|
87
|
+
if (n <= 0) {
|
88
|
+
return NAN;
|
89
|
+
}
|
90
|
+
|
91
|
+
if (n == INFINITY) {
|
92
|
+
return normal_pdf(x, 0, 1);
|
93
|
+
}
|
71
94
|
|
72
95
|
return tgamma((n + 1.0) / 2.0) / (sqrt(n * DIST_PI) * tgamma(n / 2.0)) * pow(1.0 + x * x / n, -(n + 1.0) / 2.0);
|
73
96
|
}
|
@@ -75,8 +98,22 @@ double students_t_pdf(double x, unsigned int n) {
|
|
75
98
|
// Hill, G. W. (1970).
|
76
99
|
// Algorithm 395: Student's t-distribution.
|
77
100
|
// Communications of the ACM, 13(10), 617-619.
|
78
|
-
double students_t_cdf(double x,
|
79
|
-
|
101
|
+
double students_t_cdf(double x, double n) {
|
102
|
+
if (n < 1) {
|
103
|
+
return NAN;
|
104
|
+
}
|
105
|
+
|
106
|
+
if (isnan(x)) {
|
107
|
+
return NAN;
|
108
|
+
}
|
109
|
+
|
110
|
+
if (!isfinite(x)) {
|
111
|
+
return x < 0 ? 0 : 1;
|
112
|
+
}
|
113
|
+
|
114
|
+
if (n == INFINITY) {
|
115
|
+
return normal_cdf(x, 0, 1);
|
116
|
+
}
|
80
117
|
|
81
118
|
double start = x < 0 ? 0 : 1;
|
82
119
|
double sign = x < 0 ? 1 : -1;
|
@@ -86,7 +123,7 @@ double students_t_cdf(double x, unsigned int n) {
|
|
86
123
|
double y = t / n;
|
87
124
|
double b = 1.0 + y;
|
88
125
|
|
89
|
-
if ((n >= 20 && t < n) || n > 200) {
|
126
|
+
if (n > floor(n) || (n >= 20 && t < n) || n > 200) {
|
90
127
|
// asymptotic series for large or noninteger n
|
91
128
|
if (y > 10e-6) {
|
92
129
|
y = log(b);
|
@@ -98,6 +135,10 @@ double students_t_cdf(double x, unsigned int n) {
|
|
98
135
|
return start + sign * normal_cdf(-y, 0.0, 1.0);
|
99
136
|
}
|
100
137
|
|
138
|
+
// make n int
|
139
|
+
// n is int between 1 and 200 if made it here
|
140
|
+
n = (int) n;
|
141
|
+
|
101
142
|
if (n < 20 && t < 4.0) {
|
102
143
|
// nested summation of cosine series
|
103
144
|
y = sqrt(y);
|
@@ -144,9 +185,14 @@ double students_t_cdf(double x, unsigned int n) {
|
|
144
185
|
// Hill, G. W. (1970).
|
145
186
|
// Algorithm 396: Student's t-quantiles.
|
146
187
|
// Communications of the ACM, 13(10), 619-620.
|
147
|
-
double students_t_ppf(double p,
|
148
|
-
|
149
|
-
|
188
|
+
double students_t_ppf(double p, double n) {
|
189
|
+
if (p < 0 || p > 1 || n < 1) {
|
190
|
+
return NAN;
|
191
|
+
}
|
192
|
+
|
193
|
+
if (n == INFINITY) {
|
194
|
+
return normal_ppf(p, 0, 1);
|
195
|
+
}
|
150
196
|
|
151
197
|
// distribution is symmetric
|
152
198
|
double sign = p < 0.5 ? -1 : 1;
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*!
|
2
|
-
* STL C++ v0.1.
|
2
|
+
* STL C++ v0.1.2
|
3
3
|
* https://github.com/ankane/stl-cpp
|
4
4
|
* Unlicense OR MIT License
|
5
5
|
*
|
@@ -13,9 +13,10 @@
|
|
13
13
|
#pragma once
|
14
14
|
|
15
15
|
#include <algorithm>
|
16
|
-
#include <cassert>
|
17
16
|
#include <cmath>
|
17
|
+
#include <numeric>
|
18
18
|
#include <optional>
|
19
|
+
#include <stdexcept>
|
19
20
|
#include <vector>
|
20
21
|
|
21
22
|
namespace stl {
|
@@ -91,14 +92,14 @@ void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool user
|
|
91
92
|
return;
|
92
93
|
}
|
93
94
|
|
94
|
-
|
95
|
-
|
95
|
+
size_t nleft = 0;
|
96
|
+
size_t nright = 0;
|
96
97
|
|
97
98
|
auto newnj = std::min(njump, n - 1);
|
98
99
|
if (len >= n) {
|
99
100
|
nleft = 1;
|
100
101
|
nright = n;
|
101
|
-
for (
|
102
|
+
for (size_t i = 1; i <= n; i += newnj) {
|
102
103
|
auto ok = est(y, n, len, ideg, (float) i, &ys[i - 1], nleft, nright, res, userw, rw);
|
103
104
|
if (!ok) {
|
104
105
|
ys[i - 1] = y[i - 1];
|
@@ -108,7 +109,7 @@ void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool user
|
|
108
109
|
auto nsh = (len + 1) / 2;
|
109
110
|
nleft = 1;
|
110
111
|
nright = len;
|
111
|
-
for (
|
112
|
+
for (size_t i = 1; i <= n; i++) { // fitted value at i
|
112
113
|
if (i > nsh && nright != n) {
|
113
114
|
nleft += 1;
|
114
115
|
nright += 1;
|
@@ -120,7 +121,7 @@ void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool user
|
|
120
121
|
}
|
121
122
|
} else { // newnj greater than one, len less than n
|
122
123
|
auto nsh = (len + 1) / 2;
|
123
|
-
for (
|
124
|
+
for (size_t i = 1; i <= n; i += newnj) { // fitted value at i
|
124
125
|
if (i < nsh) {
|
125
126
|
nleft = 1;
|
126
127
|
nright = len;
|
@@ -139,7 +140,7 @@ void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool user
|
|
139
140
|
}
|
140
141
|
|
141
142
|
if (newnj != 1) {
|
142
|
-
for (
|
143
|
+
for (size_t i = 1; i <= n - newnj; i += newnj) {
|
143
144
|
auto delta = (ys[i + newnj - 1] - ys[i - 1]) / ((float) newnj);
|
144
145
|
for (auto j = i + 1; j <= i + newnj - 1; j++) {
|
145
146
|
ys[j - 1] = ys[i - 1] + delta * ((float) (j - i));
|
@@ -167,7 +168,7 @@ void ma(const float* x, size_t n, size_t len, float* ave) {
|
|
167
168
|
auto v = 0.0;
|
168
169
|
|
169
170
|
// get the first average
|
170
|
-
for (
|
171
|
+
for (size_t i = 0; i < len; i++) {
|
171
172
|
v += x[i];
|
172
173
|
}
|
173
174
|
|
@@ -175,7 +176,7 @@ void ma(const float* x, size_t n, size_t len, float* ave) {
|
|
175
176
|
if (newn > 1) {
|
176
177
|
auto k = len;
|
177
178
|
auto m = 0;
|
178
|
-
for (
|
179
|
+
for (size_t j = 1; j < newn; j++) {
|
179
180
|
// window down the array
|
180
181
|
v = v - x[m] + x[k];
|
181
182
|
ave[j] = v / flen;
|
@@ -192,7 +193,7 @@ void fts(const float* x, size_t n, size_t np, float* trend, float* work) {
|
|
192
193
|
}
|
193
194
|
|
194
195
|
void rwts(const float* y, size_t n, const float* fit, float* rw) {
|
195
|
-
for (
|
196
|
+
for (size_t i = 0; i < n; i++) {
|
196
197
|
rw[i] = fabs(y[i] - fit[i]);
|
197
198
|
}
|
198
199
|
|
@@ -206,7 +207,7 @@ void rwts(const float* y, size_t n, const float* fit, float* rw) {
|
|
206
207
|
auto c9 = 0.999 * cmad;
|
207
208
|
auto c1 = 0.001 * cmad;
|
208
209
|
|
209
|
-
for (
|
210
|
+
for (size_t i = 0; i < n; i++) {
|
210
211
|
auto r = fabs(y[i] - fit[i]);
|
211
212
|
if (r <= c1) {
|
212
213
|
rw[i] = 1.0;
|
@@ -219,14 +220,14 @@ void rwts(const float* y, size_t n, const float* fit, float* rw) {
|
|
219
220
|
}
|
220
221
|
|
221
222
|
void ss(const float* y, size_t n, size_t np, size_t ns, int isdeg, size_t nsjump, bool userw, float* rw, float* season, float* work1, float* work2, float* work3, float* work4) {
|
222
|
-
for (
|
223
|
-
|
223
|
+
for (size_t j = 1; j <= np; j++) {
|
224
|
+
size_t k = (n - j) / np + 1;
|
224
225
|
|
225
|
-
for (
|
226
|
+
for (size_t i = 1; i <= k; i++) {
|
226
227
|
work1[i - 1] = y[(i - 1) * np + j - 1];
|
227
228
|
}
|
228
229
|
if (userw) {
|
229
|
-
for (
|
230
|
+
for (size_t i = 1; i <= k; i++) {
|
230
231
|
work3[i - 1] = rw[(i - 1) * np + j - 1];
|
231
232
|
}
|
232
233
|
}
|
@@ -243,25 +244,25 @@ void ss(const float* y, size_t n, size_t np, size_t ns, int isdeg, size_t nsjump
|
|
243
244
|
if (!ok) {
|
244
245
|
work2[k + 1] = work2[k];
|
245
246
|
}
|
246
|
-
for (
|
247
|
+
for (size_t m = 1; m <= k + 2; m++) {
|
247
248
|
season[(m - 1) * np + j - 1] = work2[m - 1];
|
248
249
|
}
|
249
250
|
}
|
250
251
|
}
|
251
252
|
|
252
253
|
void onestp(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, int isdeg, int itdeg, int ildeg, size_t nsjump, size_t ntjump, size_t nljump, size_t ni, bool userw, float* rw, float* season, float* trend, float* work1, float* work2, float* work3, float* work4, float* work5) {
|
253
|
-
for (
|
254
|
-
for (
|
254
|
+
for (size_t j = 0; j < ni; j++) {
|
255
|
+
for (size_t i = 0; i < n; i++) {
|
255
256
|
work1[i] = y[i] - trend[i];
|
256
257
|
}
|
257
258
|
|
258
259
|
ss(work1, n, np, ns, isdeg, nsjump, userw, rw, work2, work3, work4, work5, season);
|
259
260
|
fts(work2, n + 2 * np, np, work3, work1);
|
260
261
|
ess(work3, n, nl, ildeg, nljump, false, work4, work1, work5);
|
261
|
-
for (
|
262
|
+
for (size_t i = 0; i < n; i++) {
|
262
263
|
season[i] = work2[np + i] - work1[i];
|
263
264
|
}
|
264
|
-
for (
|
265
|
+
for (size_t i = 0; i < n; i++) {
|
265
266
|
work1[i] = y[i] - season[i];
|
266
267
|
}
|
267
268
|
ess(work1, n, nt, itdeg, ntjump, userw, rw, trend, work3);
|
@@ -269,6 +270,39 @@ void onestp(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl
|
|
269
270
|
}
|
270
271
|
|
271
272
|
void stl(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, int isdeg, int itdeg, int ildeg, size_t nsjump, size_t ntjump, size_t nljump, size_t ni, size_t no, float* rw, float* season, float* trend) {
|
273
|
+
if (ns < 3) {
|
274
|
+
throw std::invalid_argument("seasonal_length must be at least 3");
|
275
|
+
}
|
276
|
+
if (nt < 3) {
|
277
|
+
throw std::invalid_argument("trend_length must be at least 3");
|
278
|
+
}
|
279
|
+
if (nl < 3) {
|
280
|
+
throw std::invalid_argument("low_pass_length must be at least 3");
|
281
|
+
}
|
282
|
+
if (np < 2) {
|
283
|
+
throw std::invalid_argument("period must be at least 2");
|
284
|
+
}
|
285
|
+
|
286
|
+
if (isdeg != 0 && isdeg != 1) {
|
287
|
+
throw std::invalid_argument("seasonal_degree must be 0 or 1");
|
288
|
+
}
|
289
|
+
if (itdeg != 0 && itdeg != 1) {
|
290
|
+
throw std::invalid_argument("trend_degree must be 0 or 1");
|
291
|
+
}
|
292
|
+
if (ildeg != 0 && ildeg != 1) {
|
293
|
+
throw std::invalid_argument("low_pass_degree must be 0 or 1");
|
294
|
+
}
|
295
|
+
|
296
|
+
if (ns % 2 != 1) {
|
297
|
+
throw std::invalid_argument("seasonal_length must be odd");
|
298
|
+
}
|
299
|
+
if (nt % 2 != 1) {
|
300
|
+
throw std::invalid_argument("trend_length must be odd");
|
301
|
+
}
|
302
|
+
if (nl % 2 != 1) {
|
303
|
+
throw std::invalid_argument("low_pass_length must be odd");
|
304
|
+
}
|
305
|
+
|
272
306
|
auto work1 = std::vector<float>(n + 2 * np);
|
273
307
|
auto work2 = std::vector<float>(n + 2 * np);
|
274
308
|
auto work3 = std::vector<float>(n + 2 * np);
|
@@ -276,20 +310,7 @@ void stl(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, i
|
|
276
310
|
auto work5 = std::vector<float>(n + 2 * np);
|
277
311
|
|
278
312
|
auto userw = false;
|
279
|
-
|
280
|
-
|
281
|
-
assert(ns >= 3);
|
282
|
-
assert(nt >= 3);
|
283
|
-
assert(nl >= 3);
|
284
|
-
assert(np >= 2);
|
285
|
-
|
286
|
-
assert(isdeg == 0 || isdeg == 1);
|
287
|
-
assert(itdeg == 0 || itdeg == 1);
|
288
|
-
assert(ildeg == 0 || ildeg == 1);
|
289
|
-
|
290
|
-
assert(ns % 2 == 1);
|
291
|
-
assert(nt % 2 == 1);
|
292
|
-
assert(nl % 2 == 1);
|
313
|
+
size_t k = 0;
|
293
314
|
|
294
315
|
while (true) {
|
295
316
|
onestp(y, n, np, ns, nt, nl, isdeg, itdeg, ildeg, nsjump, ntjump, nljump, ni, userw, rw, season, trend, work1.data(), work2.data(), work3.data(), work4.data(), work5.data());
|
@@ -297,7 +318,7 @@ void stl(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, i
|
|
297
318
|
if (k > no) {
|
298
319
|
break;
|
299
320
|
}
|
300
|
-
for (
|
321
|
+
for (size_t i = 0; i < n; i++) {
|
301
322
|
work1[i] = trend[i] + season[i];
|
302
323
|
}
|
303
324
|
rwts(y, n, work1.data(), rw);
|
@@ -305,18 +326,46 @@ void stl(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, i
|
|
305
326
|
}
|
306
327
|
|
307
328
|
if (no <= 0) {
|
308
|
-
for (
|
329
|
+
for (size_t i = 0; i < n; i++) {
|
309
330
|
rw[i] = 1.0;
|
310
331
|
}
|
311
332
|
}
|
312
333
|
}
|
313
334
|
|
335
|
+
float var(const std::vector<float>& series) {
|
336
|
+
auto mean = std::accumulate(series.begin(), series.end(), 0.0) / series.size();
|
337
|
+
std::vector<float> tmp;
|
338
|
+
tmp.reserve(series.size());
|
339
|
+
for (auto v : series) {
|
340
|
+
tmp.push_back(pow(v - mean, 2));
|
341
|
+
}
|
342
|
+
return std::accumulate(tmp.begin(), tmp.end(), 0.0) / (series.size() - 1);
|
343
|
+
}
|
344
|
+
|
314
345
|
class StlResult {
|
315
346
|
public:
|
316
347
|
std::vector<float> seasonal;
|
317
348
|
std::vector<float> trend;
|
318
349
|
std::vector<float> remainder;
|
319
350
|
std::vector<float> weights;
|
351
|
+
|
352
|
+
inline float seasonal_strength() {
|
353
|
+
std::vector<float> sr;
|
354
|
+
sr.reserve(remainder.size());
|
355
|
+
for (size_t i = 0; i < remainder.size(); i++) {
|
356
|
+
sr.push_back(seasonal[i] + remainder[i]);
|
357
|
+
}
|
358
|
+
return std::max(0.0, 1.0 - var(remainder) / var(sr));
|
359
|
+
}
|
360
|
+
|
361
|
+
inline float trend_strength() {
|
362
|
+
std::vector<float> tr;
|
363
|
+
tr.reserve(remainder.size());
|
364
|
+
for (size_t i = 0; i < remainder.size(); i++) {
|
365
|
+
tr.push_back(trend[i] + remainder[i]);
|
366
|
+
}
|
367
|
+
return std::max(0.0, 1.0 - var(remainder) / var(tr));
|
368
|
+
}
|
320
369
|
};
|
321
370
|
|
322
371
|
class StlParams {
|
@@ -337,62 +386,62 @@ public:
|
|
337
386
|
inline StlParams seasonal_length(size_t ns) {
|
338
387
|
this->ns_ = ns;
|
339
388
|
return *this;
|
340
|
-
}
|
389
|
+
}
|
341
390
|
|
342
391
|
inline StlParams trend_length(size_t nt) {
|
343
392
|
this->nt_ = nt;
|
344
393
|
return *this;
|
345
|
-
}
|
394
|
+
}
|
346
395
|
|
347
396
|
inline StlParams low_pass_length(size_t nl) {
|
348
397
|
this->nl_ = nl;
|
349
398
|
return *this;
|
350
|
-
}
|
399
|
+
}
|
351
400
|
|
352
401
|
inline StlParams seasonal_degree(int isdeg) {
|
353
402
|
this->isdeg_ = isdeg;
|
354
403
|
return *this;
|
355
|
-
}
|
404
|
+
}
|
356
405
|
|
357
406
|
inline StlParams trend_degree(int itdeg) {
|
358
407
|
this->itdeg_ = itdeg;
|
359
408
|
return *this;
|
360
|
-
}
|
409
|
+
}
|
361
410
|
|
362
411
|
inline StlParams low_pass_degree(int ildeg) {
|
363
412
|
this->ildeg_ = ildeg;
|
364
413
|
return *this;
|
365
|
-
}
|
414
|
+
}
|
366
415
|
|
367
416
|
inline StlParams seasonal_jump(size_t nsjump) {
|
368
417
|
this->nsjump_ = nsjump;
|
369
418
|
return *this;
|
370
|
-
}
|
419
|
+
}
|
371
420
|
|
372
421
|
inline StlParams trend_jump(size_t ntjump) {
|
373
422
|
this->ntjump_ = ntjump;
|
374
423
|
return *this;
|
375
|
-
}
|
424
|
+
}
|
376
425
|
|
377
426
|
inline StlParams low_pass_jump(size_t nljump) {
|
378
427
|
this->nljump_ = nljump;
|
379
428
|
return *this;
|
380
|
-
}
|
429
|
+
}
|
381
430
|
|
382
431
|
inline StlParams inner_loops(bool ni) {
|
383
432
|
this->ni_ = ni;
|
384
433
|
return *this;
|
385
|
-
}
|
434
|
+
}
|
386
435
|
|
387
436
|
inline StlParams outer_loops(bool no) {
|
388
437
|
this->no_ = no;
|
389
438
|
return *this;
|
390
|
-
}
|
439
|
+
}
|
391
440
|
|
392
441
|
inline StlParams robust(bool robust) {
|
393
442
|
this->robust_ = robust;
|
394
443
|
return *this;
|
395
|
-
}
|
444
|
+
}
|
396
445
|
|
397
446
|
StlResult fit(const float* y, size_t n, size_t np);
|
398
447
|
StlResult fit(const std::vector<float>& y, size_t np);
|
@@ -403,6 +452,10 @@ StlParams params() {
|
|
403
452
|
}
|
404
453
|
|
405
454
|
StlResult StlParams::fit(const float* y, size_t n, size_t np) {
|
455
|
+
if (n < 2 * np) {
|
456
|
+
throw std::invalid_argument("series has less than two periods");
|
457
|
+
}
|
458
|
+
|
406
459
|
auto ns = this->ns_.value_or(np);
|
407
460
|
|
408
461
|
auto isdeg = this->isdeg_;
|
@@ -444,7 +497,7 @@ StlResult StlParams::fit(const float* y, size_t n, size_t np) {
|
|
444
497
|
stl(y, n, newnp, newns, nt, nl, isdeg, itdeg, ildeg, nsjump, ntjump, nljump, ni, no, res.weights.data(), res.seasonal.data(), res.trend.data());
|
445
498
|
|
446
499
|
res.remainder.reserve(n);
|
447
|
-
for (
|
500
|
+
for (size_t i = 0; i < n; i++) {
|
448
501
|
res.remainder.push_back(y[i] - res.seasonal[i] - res.trend[i]);
|
449
502
|
}
|
450
503
|
|
data/lib/anomaly_detection.rb
CHANGED
@@ -1,12 +1,19 @@
|
|
1
1
|
# extensions
|
2
|
-
|
2
|
+
require_relative "anomaly_detection/ext"
|
3
3
|
|
4
4
|
# modules
|
5
|
-
|
5
|
+
require_relative "anomaly_detection/version"
|
6
6
|
|
7
7
|
module AnomalyDetection
|
8
8
|
class << self
|
9
9
|
def detect(series, period:, max_anoms: 0.1, alpha: 0.05, direction: "both", plot: false, verbose: false)
|
10
|
+
if period == :auto
|
11
|
+
period = determine_period(series)
|
12
|
+
puts "Set period to #{period}" if verbose
|
13
|
+
elsif period.nil?
|
14
|
+
period = 1
|
15
|
+
end
|
16
|
+
|
10
17
|
raise ArgumentError, "series must contain at least 2 periods" if series.size < period * 2
|
11
18
|
|
12
19
|
if series.is_a?(Hash)
|
@@ -16,6 +23,9 @@ module AnomalyDetection
|
|
16
23
|
x = series
|
17
24
|
end
|
18
25
|
|
26
|
+
# flush Ruby output since std::endl flushes C++ output
|
27
|
+
$stdout.flush if verbose
|
28
|
+
|
19
29
|
res = _detect(x, period, max_anoms, alpha, direction, verbose)
|
20
30
|
res.map! { |i| sorted[i][0] } if series.is_a?(Hash)
|
21
31
|
res
|
@@ -63,6 +73,51 @@ module AnomalyDetection
|
|
63
73
|
.config(axis: {title: nil, labelFontSize: 12})
|
64
74
|
end
|
65
75
|
|
76
|
+
# determine period based on time keys (experimental)
|
77
|
+
# in future, could use an approach that looks at values
|
78
|
+
# like https://stats.stackexchange.com/a/1214
|
79
|
+
def determine_period(series)
|
80
|
+
unless series.is_a?(Hash)
|
81
|
+
raise ArgumentError, "series must be a hash for :auto period"
|
82
|
+
end
|
83
|
+
|
84
|
+
times = series.keys.map(&:to_time)
|
85
|
+
|
86
|
+
second = times.all? { |t| t.nsec == 0 }
|
87
|
+
minute = second && times.all? { |t| t.sec == 0 }
|
88
|
+
hour = minute && times.all? { |t| t.min == 0 }
|
89
|
+
day = hour && times.all? { |t| t.hour == 0 }
|
90
|
+
week = day && times.map { |k| k.wday }.uniq.size == 1
|
91
|
+
month = day && times.all? { |k| k.day == 1 }
|
92
|
+
quarter = month && times.all? { |k| k.month % 3 == 1 }
|
93
|
+
year = quarter && times.all? { |k| k.month == 1 }
|
94
|
+
|
95
|
+
period =
|
96
|
+
if year
|
97
|
+
1
|
98
|
+
elsif quarter
|
99
|
+
4
|
100
|
+
elsif month
|
101
|
+
12
|
102
|
+
elsif week
|
103
|
+
52
|
104
|
+
elsif day
|
105
|
+
7
|
106
|
+
elsif hour
|
107
|
+
24 # or 24 * 7
|
108
|
+
elsif minute
|
109
|
+
60 # or 60 * 24
|
110
|
+
elsif second
|
111
|
+
60 # or 60 * 60
|
112
|
+
end
|
113
|
+
|
114
|
+
if series.size < period * 2
|
115
|
+
1
|
116
|
+
else
|
117
|
+
period
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
66
121
|
private
|
67
122
|
|
68
123
|
def iso8601(v)
|
@@ -0,0 +1,15 @@
|
|
1
|
+
Copyright (C) 2015 Twitter, Inc and other contributors
|
2
|
+
Copyright (C) 2022 Andrew Kane
|
3
|
+
|
4
|
+
This program is free software: you can redistribute it and/or modify
|
5
|
+
it under the terms of the GNU General Public License as published by
|
6
|
+
the Free Software Foundation, either version 3 of the License, or
|
7
|
+
(at your option) any later version.
|
8
|
+
|
9
|
+
This program is distributed in the hope that it will be useful,
|
10
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
11
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
12
|
+
GNU General Public License for more details.
|
13
|
+
|
14
|
+
You should have received a copy of the GNU General Public License
|
15
|
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: anomaly_detection
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-02-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rice
|
@@ -45,6 +45,7 @@ files:
|
|
45
45
|
- licenses/LICENSE-AnomalyDetection-cpp.txt
|
46
46
|
- licenses/LICENSE-MIT-dist-h.txt
|
47
47
|
- licenses/LICENSE-MIT-stl-cpp.txt
|
48
|
+
- licenses/NOTICE-AnomalyDetection-cpp.txt
|
48
49
|
- licenses/UNLICENSE-dist-h.txt
|
49
50
|
- licenses/UNLICENSE-stl-cpp.txt
|
50
51
|
homepage: https://github.com/ankane/AnomalyDetection.rb
|
@@ -59,14 +60,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
59
60
|
requirements:
|
60
61
|
- - ">="
|
61
62
|
- !ruby/object:Gem::Version
|
62
|
-
version: '2.
|
63
|
+
version: '2.7'
|
63
64
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
64
65
|
requirements:
|
65
66
|
- - ">="
|
66
67
|
- !ruby/object:Gem::Version
|
67
68
|
version: '0'
|
68
69
|
requirements: []
|
69
|
-
rubygems_version: 3.
|
70
|
+
rubygems_version: 3.4.1
|
70
71
|
signing_key:
|
71
72
|
specification_version: 4
|
72
73
|
summary: Time series anomaly detection for Ruby
|