anomaly_detection 0.1.4 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/NOTICE.txt +1 -1
- data/ext/anomaly_detection/anomaly_detection.hpp +15 -8
- data/ext/anomaly_detection/dist.h +90 -44
- data/ext/anomaly_detection/stl.hpp +103 -50
- data/lib/anomaly_detection/version.rb +1 -1
- data/lib/anomaly_detection.rb +57 -2
- data/licenses/NOTICE-AnomalyDetection-cpp.txt +15 -0
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: da5eb71023f77a4c05e6322c020ef602e8e22b7b5ba516fce99679af702c881d
|
4
|
+
data.tar.gz: 26560c8dd893c491bd3094202ff82ae33eefdcdba74fe4386b006f7f522906df
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ec2e1459ca2410ee6ab1bce3fe9c528d6419b75e10c6448f1fe5b3030a2e3d8de320a23a9bded17702a01fd23d112007b909c8611e2da6c1ff4f8521352c89ac
|
7
|
+
data.tar.gz: ad150705d6e32a111c3bc044ef7f99910beebe572e07799719e72118422b7a9e6439943cac8b86d613537d7d0fb52cba86a668faf78d135abc888ce3737f8104
|
data/CHANGELOG.md
CHANGED
data/NOTICE.txt
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
/*!
|
2
|
-
* AnomalyDetection.cpp v0.1.
|
2
|
+
* AnomalyDetection.cpp v0.1.3
|
3
3
|
* https://github.com/ankane/AnomalyDetection.cpp
|
4
4
|
* GPL-3.0-or-later License
|
5
5
|
*/
|
@@ -39,7 +39,7 @@ float mad(const std::vector<float>& data, float med) {
|
|
39
39
|
return 1.4826 * median_sorted(res);
|
40
40
|
}
|
41
41
|
|
42
|
-
std::vector<size_t> detect_anoms(const std::vector<float>& data,
|
42
|
+
std::vector<size_t> detect_anoms(const std::vector<float>& data, size_t num_obs_per_period, float k, float alpha, bool one_tail, bool upper_tail, bool verbose, std::function<void()> callback) {
|
43
43
|
auto n = data.size();
|
44
44
|
|
45
45
|
// Check to make sure we have at least two periods worth of data for anomaly context
|
@@ -53,15 +53,22 @@ std::vector<size_t> detect_anoms(const std::vector<float>& data, int num_obs_per
|
|
53
53
|
throw std::invalid_argument("series contains NANs");
|
54
54
|
}
|
55
55
|
|
56
|
-
// Decompose data. This returns a univarite remainder which will be used for anomaly detection. Optionally, we might NOT decompose.
|
57
|
-
auto data_decomp = stl::params().robust(true).seasonal_length(data.size() * 10 + 1).fit(data, num_obs_per_period);
|
58
|
-
auto seasonal = data_decomp.seasonal;
|
59
|
-
|
60
56
|
std::vector<float> data2;
|
61
57
|
data2.reserve(n);
|
62
58
|
auto med = median(data);
|
63
|
-
|
64
|
-
|
59
|
+
|
60
|
+
if (num_obs_per_period > 1) {
|
61
|
+
// Decompose data. This returns a univarite remainder which will be used for anomaly detection. Optionally, we might NOT decompose.
|
62
|
+
auto data_decomp = stl::params().robust(true).seasonal_length(data.size() * 10 + 1).fit(data, num_obs_per_period);
|
63
|
+
auto seasonal = data_decomp.seasonal;
|
64
|
+
|
65
|
+
for (size_t i = 0; i < n; i++) {
|
66
|
+
data2.push_back(data[i] - seasonal[i] - med);
|
67
|
+
}
|
68
|
+
} else {
|
69
|
+
for (size_t i = 0; i < n; i++) {
|
70
|
+
data2.push_back(data[i] - med);
|
71
|
+
}
|
65
72
|
}
|
66
73
|
|
67
74
|
auto num_anoms = 0;
|
@@ -1,12 +1,11 @@
|
|
1
1
|
/*!
|
2
|
-
* dist.h v0.
|
2
|
+
* dist.h v0.3.0
|
3
3
|
* https://github.com/ankane/dist.h
|
4
4
|
* Unlicense OR MIT License
|
5
5
|
*/
|
6
6
|
|
7
7
|
#pragma once
|
8
8
|
|
9
|
-
#include <assert.h>
|
10
9
|
#include <math.h>
|
11
10
|
|
12
11
|
#ifdef M_E
|
@@ -21,53 +20,77 @@
|
|
21
20
|
#define DIST_PI 3.14159265358979323846
|
22
21
|
#endif
|
23
22
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
double sign = x < 0 ? -1.0 : 1.0;
|
30
|
-
x = x < 0 ? -x : x;
|
31
|
-
|
32
|
-
double a = 0.14;
|
33
|
-
double x2 = x * x;
|
34
|
-
return sign * sqrt(1.0 - exp(-x2 * (4.0 / DIST_PI + a * x2) / (1.0 + a * x2)));
|
35
|
-
}
|
36
|
-
|
37
|
-
// Winitzki, S. (2008).
|
38
|
-
// A handy approximation for the error function and its inverse.
|
39
|
-
// https://drive.google.com/file/d/0B2Mt7luZYBrwZlctV3A3eF82VGM/view?resourcekey=0-UQpPhwZgzP0sF4LHBDlLtg
|
40
|
-
// from https://sites.google.com/site/winitzki
|
41
|
-
double inverse_erf(double x) {
|
42
|
-
double sign = x < 0 ? -1.0 : 1.0;
|
43
|
-
x = x < 0 ? -x : x;
|
44
|
-
|
45
|
-
double a = 0.147;
|
46
|
-
double ln = log(1.0 - x * x);
|
47
|
-
double f1 = 2.0 / (DIST_PI * a);
|
48
|
-
double f2 = ln / 2.0;
|
49
|
-
double f3 = f1 + f2;
|
50
|
-
double f4 = 1.0 / a * ln;
|
51
|
-
return sign * sqrt(-f1 - f2 + sqrt(f3 * f3 - f4));
|
52
|
-
}
|
23
|
+
#ifdef M_SQRT2
|
24
|
+
#define DIST_SQRT2 M_SQRT2
|
25
|
+
#else
|
26
|
+
#define DIST_SQRT2 1.41421356237309504880
|
27
|
+
#endif
|
53
28
|
|
54
29
|
double normal_pdf(double x, double mean, double std_dev) {
|
55
|
-
|
56
|
-
|
30
|
+
if (std_dev <= 0) {
|
31
|
+
return NAN;
|
32
|
+
}
|
33
|
+
|
34
|
+
double n = (x - mean) / std_dev;
|
35
|
+
return (1.0 / (std_dev * sqrt(2.0 * DIST_PI))) * pow(DIST_E, -0.5 * n * n);
|
57
36
|
}
|
58
37
|
|
59
38
|
double normal_cdf(double x, double mean, double std_dev) {
|
60
|
-
|
39
|
+
if (std_dev <= 0) {
|
40
|
+
return NAN;
|
41
|
+
}
|
42
|
+
|
43
|
+
return 0.5 * (1.0 + erf((x - mean) / (std_dev * DIST_SQRT2)));
|
61
44
|
}
|
62
45
|
|
46
|
+
// Wichura, M. J. (1988).
|
47
|
+
// Algorithm AS 241: The Percentage Points of the Normal Distribution.
|
48
|
+
// Journal of the Royal Statistical Society. Series C (Applied Statistics), 37(3), 477-484.
|
63
49
|
double normal_ppf(double p, double mean, double std_dev) {
|
64
|
-
|
50
|
+
if (p < 0 || p > 1 || std_dev <= 0 || isnan(mean) || isnan(std_dev)) {
|
51
|
+
return NAN;
|
52
|
+
}
|
53
|
+
|
54
|
+
if (p == 0) {
|
55
|
+
return -INFINITY;
|
56
|
+
}
|
65
57
|
|
66
|
-
|
58
|
+
if (p == 1) {
|
59
|
+
return INFINITY;
|
60
|
+
}
|
61
|
+
|
62
|
+
double q = p - 0.5;
|
63
|
+
if (fabs(q) < 0.425) {
|
64
|
+
double r = 0.180625 - q * q;
|
65
|
+
return mean + std_dev * q *
|
66
|
+
(((((((2.5090809287301226727e3 * r + 3.3430575583588128105e4) * r + 6.7265770927008700853e4) * r + 4.5921953931549871457e4) * r + 1.3731693765509461125e4) * r + 1.9715909503065514427e3) * r + 1.3314166789178437745e2) * r + 3.3871328727963666080e0) /
|
67
|
+
(((((((5.2264952788528545610e3 * r + 2.8729085735721942674e4) * r + 3.9307895800092710610e4) * r + 2.1213794301586595867e4) * r + 5.3941960214247511077e3) * r + 6.8718700749205790830e2) * r + 4.2313330701600911252e1) * r + 1);
|
68
|
+
} else {
|
69
|
+
double r = q < 0 ? p : 1 - p;
|
70
|
+
r = sqrt(-log(r));
|
71
|
+
double sign = q < 0 ? -1 : 1;
|
72
|
+
if (r < 5) {
|
73
|
+
r -= 1.6;
|
74
|
+
return mean + std_dev * sign *
|
75
|
+
(((((((7.74545014278341407640e-4 * r + 2.27238449892691845833e-2) * r + 2.41780725177450611770e-1) * r + 1.27045825245236838258e0) * r + 3.64784832476320460504e0) * r + 5.76949722146069140550e0) * r + 4.63033784615654529590e0) * r + 1.42343711074968357734e0) /
|
76
|
+
(((((((1.05075007164441684324e-9 * r + 5.47593808499534494600e-4) * r + 1.51986665636164571966e-2) * r + 1.48103976427480074590e-1) * r + 6.89767334985100004550e-1) * r + 1.67638483018380384940e0) * r + 2.05319162663775882187e0) * r + 1);
|
77
|
+
} else {
|
78
|
+
r -= 5;
|
79
|
+
return mean + std_dev * sign *
|
80
|
+
(((((((2.01033439929228813265e-7 * r + 2.71155556874348757815e-5) * r + 1.24266094738807843860e-3) * r + 2.65321895265761230930e-2) * r + 2.96560571828504891230e-1) * r + 1.78482653991729133580e0) * r + 5.46378491116411436990e0) * r + 6.65790464350110377720e0) /
|
81
|
+
(((((((2.04426310338993978564e-15 * r + 1.42151175831644588870e-7) * r + 1.84631831751005468180e-5) * r + 7.86869131145613259100e-4) * r + 1.48753612908506148525e-2) * r + 1.36929880922735805310e-1) * r + 5.99832206555887937690e-1) * r + 1);
|
82
|
+
}
|
83
|
+
}
|
67
84
|
}
|
68
85
|
|
69
|
-
double students_t_pdf(double x,
|
70
|
-
|
86
|
+
double students_t_pdf(double x, double n) {
|
87
|
+
if (n <= 0) {
|
88
|
+
return NAN;
|
89
|
+
}
|
90
|
+
|
91
|
+
if (n == INFINITY) {
|
92
|
+
return normal_pdf(x, 0, 1);
|
93
|
+
}
|
71
94
|
|
72
95
|
return tgamma((n + 1.0) / 2.0) / (sqrt(n * DIST_PI) * tgamma(n / 2.0)) * pow(1.0 + x * x / n, -(n + 1.0) / 2.0);
|
73
96
|
}
|
@@ -75,8 +98,22 @@ double students_t_pdf(double x, unsigned int n) {
|
|
75
98
|
// Hill, G. W. (1970).
|
76
99
|
// Algorithm 395: Student's t-distribution.
|
77
100
|
// Communications of the ACM, 13(10), 617-619.
|
78
|
-
double students_t_cdf(double x,
|
79
|
-
|
101
|
+
double students_t_cdf(double x, double n) {
|
102
|
+
if (n < 1) {
|
103
|
+
return NAN;
|
104
|
+
}
|
105
|
+
|
106
|
+
if (isnan(x)) {
|
107
|
+
return NAN;
|
108
|
+
}
|
109
|
+
|
110
|
+
if (!isfinite(x)) {
|
111
|
+
return x < 0 ? 0 : 1;
|
112
|
+
}
|
113
|
+
|
114
|
+
if (n == INFINITY) {
|
115
|
+
return normal_cdf(x, 0, 1);
|
116
|
+
}
|
80
117
|
|
81
118
|
double start = x < 0 ? 0 : 1;
|
82
119
|
double sign = x < 0 ? 1 : -1;
|
@@ -86,7 +123,7 @@ double students_t_cdf(double x, unsigned int n) {
|
|
86
123
|
double y = t / n;
|
87
124
|
double b = 1.0 + y;
|
88
125
|
|
89
|
-
if ((n >= 20 && t < n) || n > 200) {
|
126
|
+
if (n > floor(n) || (n >= 20 && t < n) || n > 200) {
|
90
127
|
// asymptotic series for large or noninteger n
|
91
128
|
if (y > 10e-6) {
|
92
129
|
y = log(b);
|
@@ -98,6 +135,10 @@ double students_t_cdf(double x, unsigned int n) {
|
|
98
135
|
return start + sign * normal_cdf(-y, 0.0, 1.0);
|
99
136
|
}
|
100
137
|
|
138
|
+
// make n int
|
139
|
+
// n is int between 1 and 200 if made it here
|
140
|
+
n = (int) n;
|
141
|
+
|
101
142
|
if (n < 20 && t < 4.0) {
|
102
143
|
// nested summation of cosine series
|
103
144
|
y = sqrt(y);
|
@@ -144,9 +185,14 @@ double students_t_cdf(double x, unsigned int n) {
|
|
144
185
|
// Hill, G. W. (1970).
|
145
186
|
// Algorithm 396: Student's t-quantiles.
|
146
187
|
// Communications of the ACM, 13(10), 619-620.
|
147
|
-
double students_t_ppf(double p,
|
148
|
-
|
149
|
-
|
188
|
+
double students_t_ppf(double p, double n) {
|
189
|
+
if (p < 0 || p > 1 || n < 1) {
|
190
|
+
return NAN;
|
191
|
+
}
|
192
|
+
|
193
|
+
if (n == INFINITY) {
|
194
|
+
return normal_ppf(p, 0, 1);
|
195
|
+
}
|
150
196
|
|
151
197
|
// distribution is symmetric
|
152
198
|
double sign = p < 0.5 ? -1 : 1;
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*!
|
2
|
-
* STL C++ v0.1.
|
2
|
+
* STL C++ v0.1.2
|
3
3
|
* https://github.com/ankane/stl-cpp
|
4
4
|
* Unlicense OR MIT License
|
5
5
|
*
|
@@ -13,9 +13,10 @@
|
|
13
13
|
#pragma once
|
14
14
|
|
15
15
|
#include <algorithm>
|
16
|
-
#include <cassert>
|
17
16
|
#include <cmath>
|
17
|
+
#include <numeric>
|
18
18
|
#include <optional>
|
19
|
+
#include <stdexcept>
|
19
20
|
#include <vector>
|
20
21
|
|
21
22
|
namespace stl {
|
@@ -91,14 +92,14 @@ void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool user
|
|
91
92
|
return;
|
92
93
|
}
|
93
94
|
|
94
|
-
|
95
|
-
|
95
|
+
size_t nleft = 0;
|
96
|
+
size_t nright = 0;
|
96
97
|
|
97
98
|
auto newnj = std::min(njump, n - 1);
|
98
99
|
if (len >= n) {
|
99
100
|
nleft = 1;
|
100
101
|
nright = n;
|
101
|
-
for (
|
102
|
+
for (size_t i = 1; i <= n; i += newnj) {
|
102
103
|
auto ok = est(y, n, len, ideg, (float) i, &ys[i - 1], nleft, nright, res, userw, rw);
|
103
104
|
if (!ok) {
|
104
105
|
ys[i - 1] = y[i - 1];
|
@@ -108,7 +109,7 @@ void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool user
|
|
108
109
|
auto nsh = (len + 1) / 2;
|
109
110
|
nleft = 1;
|
110
111
|
nright = len;
|
111
|
-
for (
|
112
|
+
for (size_t i = 1; i <= n; i++) { // fitted value at i
|
112
113
|
if (i > nsh && nright != n) {
|
113
114
|
nleft += 1;
|
114
115
|
nright += 1;
|
@@ -120,7 +121,7 @@ void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool user
|
|
120
121
|
}
|
121
122
|
} else { // newnj greater than one, len less than n
|
122
123
|
auto nsh = (len + 1) / 2;
|
123
|
-
for (
|
124
|
+
for (size_t i = 1; i <= n; i += newnj) { // fitted value at i
|
124
125
|
if (i < nsh) {
|
125
126
|
nleft = 1;
|
126
127
|
nright = len;
|
@@ -139,7 +140,7 @@ void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool user
|
|
139
140
|
}
|
140
141
|
|
141
142
|
if (newnj != 1) {
|
142
|
-
for (
|
143
|
+
for (size_t i = 1; i <= n - newnj; i += newnj) {
|
143
144
|
auto delta = (ys[i + newnj - 1] - ys[i - 1]) / ((float) newnj);
|
144
145
|
for (auto j = i + 1; j <= i + newnj - 1; j++) {
|
145
146
|
ys[j - 1] = ys[i - 1] + delta * ((float) (j - i));
|
@@ -167,7 +168,7 @@ void ma(const float* x, size_t n, size_t len, float* ave) {
|
|
167
168
|
auto v = 0.0;
|
168
169
|
|
169
170
|
// get the first average
|
170
|
-
for (
|
171
|
+
for (size_t i = 0; i < len; i++) {
|
171
172
|
v += x[i];
|
172
173
|
}
|
173
174
|
|
@@ -175,7 +176,7 @@ void ma(const float* x, size_t n, size_t len, float* ave) {
|
|
175
176
|
if (newn > 1) {
|
176
177
|
auto k = len;
|
177
178
|
auto m = 0;
|
178
|
-
for (
|
179
|
+
for (size_t j = 1; j < newn; j++) {
|
179
180
|
// window down the array
|
180
181
|
v = v - x[m] + x[k];
|
181
182
|
ave[j] = v / flen;
|
@@ -192,7 +193,7 @@ void fts(const float* x, size_t n, size_t np, float* trend, float* work) {
|
|
192
193
|
}
|
193
194
|
|
194
195
|
void rwts(const float* y, size_t n, const float* fit, float* rw) {
|
195
|
-
for (
|
196
|
+
for (size_t i = 0; i < n; i++) {
|
196
197
|
rw[i] = fabs(y[i] - fit[i]);
|
197
198
|
}
|
198
199
|
|
@@ -206,7 +207,7 @@ void rwts(const float* y, size_t n, const float* fit, float* rw) {
|
|
206
207
|
auto c9 = 0.999 * cmad;
|
207
208
|
auto c1 = 0.001 * cmad;
|
208
209
|
|
209
|
-
for (
|
210
|
+
for (size_t i = 0; i < n; i++) {
|
210
211
|
auto r = fabs(y[i] - fit[i]);
|
211
212
|
if (r <= c1) {
|
212
213
|
rw[i] = 1.0;
|
@@ -219,14 +220,14 @@ void rwts(const float* y, size_t n, const float* fit, float* rw) {
|
|
219
220
|
}
|
220
221
|
|
221
222
|
void ss(const float* y, size_t n, size_t np, size_t ns, int isdeg, size_t nsjump, bool userw, float* rw, float* season, float* work1, float* work2, float* work3, float* work4) {
|
222
|
-
for (
|
223
|
-
|
223
|
+
for (size_t j = 1; j <= np; j++) {
|
224
|
+
size_t k = (n - j) / np + 1;
|
224
225
|
|
225
|
-
for (
|
226
|
+
for (size_t i = 1; i <= k; i++) {
|
226
227
|
work1[i - 1] = y[(i - 1) * np + j - 1];
|
227
228
|
}
|
228
229
|
if (userw) {
|
229
|
-
for (
|
230
|
+
for (size_t i = 1; i <= k; i++) {
|
230
231
|
work3[i - 1] = rw[(i - 1) * np + j - 1];
|
231
232
|
}
|
232
233
|
}
|
@@ -243,25 +244,25 @@ void ss(const float* y, size_t n, size_t np, size_t ns, int isdeg, size_t nsjump
|
|
243
244
|
if (!ok) {
|
244
245
|
work2[k + 1] = work2[k];
|
245
246
|
}
|
246
|
-
for (
|
247
|
+
for (size_t m = 1; m <= k + 2; m++) {
|
247
248
|
season[(m - 1) * np + j - 1] = work2[m - 1];
|
248
249
|
}
|
249
250
|
}
|
250
251
|
}
|
251
252
|
|
252
253
|
void onestp(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, int isdeg, int itdeg, int ildeg, size_t nsjump, size_t ntjump, size_t nljump, size_t ni, bool userw, float* rw, float* season, float* trend, float* work1, float* work2, float* work3, float* work4, float* work5) {
|
253
|
-
for (
|
254
|
-
for (
|
254
|
+
for (size_t j = 0; j < ni; j++) {
|
255
|
+
for (size_t i = 0; i < n; i++) {
|
255
256
|
work1[i] = y[i] - trend[i];
|
256
257
|
}
|
257
258
|
|
258
259
|
ss(work1, n, np, ns, isdeg, nsjump, userw, rw, work2, work3, work4, work5, season);
|
259
260
|
fts(work2, n + 2 * np, np, work3, work1);
|
260
261
|
ess(work3, n, nl, ildeg, nljump, false, work4, work1, work5);
|
261
|
-
for (
|
262
|
+
for (size_t i = 0; i < n; i++) {
|
262
263
|
season[i] = work2[np + i] - work1[i];
|
263
264
|
}
|
264
|
-
for (
|
265
|
+
for (size_t i = 0; i < n; i++) {
|
265
266
|
work1[i] = y[i] - season[i];
|
266
267
|
}
|
267
268
|
ess(work1, n, nt, itdeg, ntjump, userw, rw, trend, work3);
|
@@ -269,6 +270,39 @@ void onestp(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl
|
|
269
270
|
}
|
270
271
|
|
271
272
|
void stl(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, int isdeg, int itdeg, int ildeg, size_t nsjump, size_t ntjump, size_t nljump, size_t ni, size_t no, float* rw, float* season, float* trend) {
|
273
|
+
if (ns < 3) {
|
274
|
+
throw std::invalid_argument("seasonal_length must be at least 3");
|
275
|
+
}
|
276
|
+
if (nt < 3) {
|
277
|
+
throw std::invalid_argument("trend_length must be at least 3");
|
278
|
+
}
|
279
|
+
if (nl < 3) {
|
280
|
+
throw std::invalid_argument("low_pass_length must be at least 3");
|
281
|
+
}
|
282
|
+
if (np < 2) {
|
283
|
+
throw std::invalid_argument("period must be at least 2");
|
284
|
+
}
|
285
|
+
|
286
|
+
if (isdeg != 0 && isdeg != 1) {
|
287
|
+
throw std::invalid_argument("seasonal_degree must be 0 or 1");
|
288
|
+
}
|
289
|
+
if (itdeg != 0 && itdeg != 1) {
|
290
|
+
throw std::invalid_argument("trend_degree must be 0 or 1");
|
291
|
+
}
|
292
|
+
if (ildeg != 0 && ildeg != 1) {
|
293
|
+
throw std::invalid_argument("low_pass_degree must be 0 or 1");
|
294
|
+
}
|
295
|
+
|
296
|
+
if (ns % 2 != 1) {
|
297
|
+
throw std::invalid_argument("seasonal_length must be odd");
|
298
|
+
}
|
299
|
+
if (nt % 2 != 1) {
|
300
|
+
throw std::invalid_argument("trend_length must be odd");
|
301
|
+
}
|
302
|
+
if (nl % 2 != 1) {
|
303
|
+
throw std::invalid_argument("low_pass_length must be odd");
|
304
|
+
}
|
305
|
+
|
272
306
|
auto work1 = std::vector<float>(n + 2 * np);
|
273
307
|
auto work2 = std::vector<float>(n + 2 * np);
|
274
308
|
auto work3 = std::vector<float>(n + 2 * np);
|
@@ -276,20 +310,7 @@ void stl(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, i
|
|
276
310
|
auto work5 = std::vector<float>(n + 2 * np);
|
277
311
|
|
278
312
|
auto userw = false;
|
279
|
-
|
280
|
-
|
281
|
-
assert(ns >= 3);
|
282
|
-
assert(nt >= 3);
|
283
|
-
assert(nl >= 3);
|
284
|
-
assert(np >= 2);
|
285
|
-
|
286
|
-
assert(isdeg == 0 || isdeg == 1);
|
287
|
-
assert(itdeg == 0 || itdeg == 1);
|
288
|
-
assert(ildeg == 0 || ildeg == 1);
|
289
|
-
|
290
|
-
assert(ns % 2 == 1);
|
291
|
-
assert(nt % 2 == 1);
|
292
|
-
assert(nl % 2 == 1);
|
313
|
+
size_t k = 0;
|
293
314
|
|
294
315
|
while (true) {
|
295
316
|
onestp(y, n, np, ns, nt, nl, isdeg, itdeg, ildeg, nsjump, ntjump, nljump, ni, userw, rw, season, trend, work1.data(), work2.data(), work3.data(), work4.data(), work5.data());
|
@@ -297,7 +318,7 @@ void stl(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, i
|
|
297
318
|
if (k > no) {
|
298
319
|
break;
|
299
320
|
}
|
300
|
-
for (
|
321
|
+
for (size_t i = 0; i < n; i++) {
|
301
322
|
work1[i] = trend[i] + season[i];
|
302
323
|
}
|
303
324
|
rwts(y, n, work1.data(), rw);
|
@@ -305,18 +326,46 @@ void stl(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, i
|
|
305
326
|
}
|
306
327
|
|
307
328
|
if (no <= 0) {
|
308
|
-
for (
|
329
|
+
for (size_t i = 0; i < n; i++) {
|
309
330
|
rw[i] = 1.0;
|
310
331
|
}
|
311
332
|
}
|
312
333
|
}
|
313
334
|
|
335
|
+
float var(const std::vector<float>& series) {
|
336
|
+
auto mean = std::accumulate(series.begin(), series.end(), 0.0) / series.size();
|
337
|
+
std::vector<float> tmp;
|
338
|
+
tmp.reserve(series.size());
|
339
|
+
for (auto v : series) {
|
340
|
+
tmp.push_back(pow(v - mean, 2));
|
341
|
+
}
|
342
|
+
return std::accumulate(tmp.begin(), tmp.end(), 0.0) / (series.size() - 1);
|
343
|
+
}
|
344
|
+
|
314
345
|
class StlResult {
|
315
346
|
public:
|
316
347
|
std::vector<float> seasonal;
|
317
348
|
std::vector<float> trend;
|
318
349
|
std::vector<float> remainder;
|
319
350
|
std::vector<float> weights;
|
351
|
+
|
352
|
+
inline float seasonal_strength() {
|
353
|
+
std::vector<float> sr;
|
354
|
+
sr.reserve(remainder.size());
|
355
|
+
for (size_t i = 0; i < remainder.size(); i++) {
|
356
|
+
sr.push_back(seasonal[i] + remainder[i]);
|
357
|
+
}
|
358
|
+
return std::max(0.0, 1.0 - var(remainder) / var(sr));
|
359
|
+
}
|
360
|
+
|
361
|
+
inline float trend_strength() {
|
362
|
+
std::vector<float> tr;
|
363
|
+
tr.reserve(remainder.size());
|
364
|
+
for (size_t i = 0; i < remainder.size(); i++) {
|
365
|
+
tr.push_back(trend[i] + remainder[i]);
|
366
|
+
}
|
367
|
+
return std::max(0.0, 1.0 - var(remainder) / var(tr));
|
368
|
+
}
|
320
369
|
};
|
321
370
|
|
322
371
|
class StlParams {
|
@@ -337,62 +386,62 @@ public:
|
|
337
386
|
inline StlParams seasonal_length(size_t ns) {
|
338
387
|
this->ns_ = ns;
|
339
388
|
return *this;
|
340
|
-
}
|
389
|
+
}
|
341
390
|
|
342
391
|
inline StlParams trend_length(size_t nt) {
|
343
392
|
this->nt_ = nt;
|
344
393
|
return *this;
|
345
|
-
}
|
394
|
+
}
|
346
395
|
|
347
396
|
inline StlParams low_pass_length(size_t nl) {
|
348
397
|
this->nl_ = nl;
|
349
398
|
return *this;
|
350
|
-
}
|
399
|
+
}
|
351
400
|
|
352
401
|
inline StlParams seasonal_degree(int isdeg) {
|
353
402
|
this->isdeg_ = isdeg;
|
354
403
|
return *this;
|
355
|
-
}
|
404
|
+
}
|
356
405
|
|
357
406
|
inline StlParams trend_degree(int itdeg) {
|
358
407
|
this->itdeg_ = itdeg;
|
359
408
|
return *this;
|
360
|
-
}
|
409
|
+
}
|
361
410
|
|
362
411
|
inline StlParams low_pass_degree(int ildeg) {
|
363
412
|
this->ildeg_ = ildeg;
|
364
413
|
return *this;
|
365
|
-
}
|
414
|
+
}
|
366
415
|
|
367
416
|
inline StlParams seasonal_jump(size_t nsjump) {
|
368
417
|
this->nsjump_ = nsjump;
|
369
418
|
return *this;
|
370
|
-
}
|
419
|
+
}
|
371
420
|
|
372
421
|
inline StlParams trend_jump(size_t ntjump) {
|
373
422
|
this->ntjump_ = ntjump;
|
374
423
|
return *this;
|
375
|
-
}
|
424
|
+
}
|
376
425
|
|
377
426
|
inline StlParams low_pass_jump(size_t nljump) {
|
378
427
|
this->nljump_ = nljump;
|
379
428
|
return *this;
|
380
|
-
}
|
429
|
+
}
|
381
430
|
|
382
431
|
inline StlParams inner_loops(bool ni) {
|
383
432
|
this->ni_ = ni;
|
384
433
|
return *this;
|
385
|
-
}
|
434
|
+
}
|
386
435
|
|
387
436
|
inline StlParams outer_loops(bool no) {
|
388
437
|
this->no_ = no;
|
389
438
|
return *this;
|
390
|
-
}
|
439
|
+
}
|
391
440
|
|
392
441
|
inline StlParams robust(bool robust) {
|
393
442
|
this->robust_ = robust;
|
394
443
|
return *this;
|
395
|
-
}
|
444
|
+
}
|
396
445
|
|
397
446
|
StlResult fit(const float* y, size_t n, size_t np);
|
398
447
|
StlResult fit(const std::vector<float>& y, size_t np);
|
@@ -403,6 +452,10 @@ StlParams params() {
|
|
403
452
|
}
|
404
453
|
|
405
454
|
StlResult StlParams::fit(const float* y, size_t n, size_t np) {
|
455
|
+
if (n < 2 * np) {
|
456
|
+
throw std::invalid_argument("series has less than two periods");
|
457
|
+
}
|
458
|
+
|
406
459
|
auto ns = this->ns_.value_or(np);
|
407
460
|
|
408
461
|
auto isdeg = this->isdeg_;
|
@@ -444,7 +497,7 @@ StlResult StlParams::fit(const float* y, size_t n, size_t np) {
|
|
444
497
|
stl(y, n, newnp, newns, nt, nl, isdeg, itdeg, ildeg, nsjump, ntjump, nljump, ni, no, res.weights.data(), res.seasonal.data(), res.trend.data());
|
445
498
|
|
446
499
|
res.remainder.reserve(n);
|
447
|
-
for (
|
500
|
+
for (size_t i = 0; i < n; i++) {
|
448
501
|
res.remainder.push_back(y[i] - res.seasonal[i] - res.trend[i]);
|
449
502
|
}
|
450
503
|
|
data/lib/anomaly_detection.rb
CHANGED
@@ -1,12 +1,19 @@
|
|
1
1
|
# extensions
|
2
|
-
|
2
|
+
require_relative "anomaly_detection/ext"
|
3
3
|
|
4
4
|
# modules
|
5
|
-
|
5
|
+
require_relative "anomaly_detection/version"
|
6
6
|
|
7
7
|
module AnomalyDetection
|
8
8
|
class << self
|
9
9
|
def detect(series, period:, max_anoms: 0.1, alpha: 0.05, direction: "both", plot: false, verbose: false)
|
10
|
+
if period == :auto
|
11
|
+
period = determine_period(series)
|
12
|
+
puts "Set period to #{period}" if verbose
|
13
|
+
elsif period.nil?
|
14
|
+
period = 1
|
15
|
+
end
|
16
|
+
|
10
17
|
raise ArgumentError, "series must contain at least 2 periods" if series.size < period * 2
|
11
18
|
|
12
19
|
if series.is_a?(Hash)
|
@@ -16,6 +23,9 @@ module AnomalyDetection
|
|
16
23
|
x = series
|
17
24
|
end
|
18
25
|
|
26
|
+
# flush Ruby output since std::endl flushes C++ output
|
27
|
+
$stdout.flush if verbose
|
28
|
+
|
19
29
|
res = _detect(x, period, max_anoms, alpha, direction, verbose)
|
20
30
|
res.map! { |i| sorted[i][0] } if series.is_a?(Hash)
|
21
31
|
res
|
@@ -63,6 +73,51 @@ module AnomalyDetection
|
|
63
73
|
.config(axis: {title: nil, labelFontSize: 12})
|
64
74
|
end
|
65
75
|
|
76
|
+
# determine period based on time keys (experimental)
|
77
|
+
# in future, could use an approach that looks at values
|
78
|
+
# like https://stats.stackexchange.com/a/1214
|
79
|
+
def determine_period(series)
|
80
|
+
unless series.is_a?(Hash)
|
81
|
+
raise ArgumentError, "series must be a hash for :auto period"
|
82
|
+
end
|
83
|
+
|
84
|
+
times = series.keys.map(&:to_time)
|
85
|
+
|
86
|
+
second = times.all? { |t| t.nsec == 0 }
|
87
|
+
minute = second && times.all? { |t| t.sec == 0 }
|
88
|
+
hour = minute && times.all? { |t| t.min == 0 }
|
89
|
+
day = hour && times.all? { |t| t.hour == 0 }
|
90
|
+
week = day && times.map { |k| k.wday }.uniq.size == 1
|
91
|
+
month = day && times.all? { |k| k.day == 1 }
|
92
|
+
quarter = month && times.all? { |k| k.month % 3 == 1 }
|
93
|
+
year = quarter && times.all? { |k| k.month == 1 }
|
94
|
+
|
95
|
+
period =
|
96
|
+
if year
|
97
|
+
1
|
98
|
+
elsif quarter
|
99
|
+
4
|
100
|
+
elsif month
|
101
|
+
12
|
102
|
+
elsif week
|
103
|
+
52
|
104
|
+
elsif day
|
105
|
+
7
|
106
|
+
elsif hour
|
107
|
+
24 # or 24 * 7
|
108
|
+
elsif minute
|
109
|
+
60 # or 60 * 24
|
110
|
+
elsif second
|
111
|
+
60 # or 60 * 60
|
112
|
+
end
|
113
|
+
|
114
|
+
if series.size < period * 2
|
115
|
+
1
|
116
|
+
else
|
117
|
+
period
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
66
121
|
private
|
67
122
|
|
68
123
|
def iso8601(v)
|
@@ -0,0 +1,15 @@
|
|
1
|
+
Copyright (C) 2015 Twitter, Inc and other contributors
|
2
|
+
Copyright (C) 2022 Andrew Kane
|
3
|
+
|
4
|
+
This program is free software: you can redistribute it and/or modify
|
5
|
+
it under the terms of the GNU General Public License as published by
|
6
|
+
the Free Software Foundation, either version 3 of the License, or
|
7
|
+
(at your option) any later version.
|
8
|
+
|
9
|
+
This program is distributed in the hope that it will be useful,
|
10
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
11
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
12
|
+
GNU General Public License for more details.
|
13
|
+
|
14
|
+
You should have received a copy of the GNU General Public License
|
15
|
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: anomaly_detection
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-02-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rice
|
@@ -45,6 +45,7 @@ files:
|
|
45
45
|
- licenses/LICENSE-AnomalyDetection-cpp.txt
|
46
46
|
- licenses/LICENSE-MIT-dist-h.txt
|
47
47
|
- licenses/LICENSE-MIT-stl-cpp.txt
|
48
|
+
- licenses/NOTICE-AnomalyDetection-cpp.txt
|
48
49
|
- licenses/UNLICENSE-dist-h.txt
|
49
50
|
- licenses/UNLICENSE-stl-cpp.txt
|
50
51
|
homepage: https://github.com/ankane/AnomalyDetection.rb
|
@@ -59,14 +60,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
59
60
|
requirements:
|
60
61
|
- - ">="
|
61
62
|
- !ruby/object:Gem::Version
|
62
|
-
version: '2.
|
63
|
+
version: '2.7'
|
63
64
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
64
65
|
requirements:
|
65
66
|
- - ">="
|
66
67
|
- !ruby/object:Gem::Version
|
67
68
|
version: '0'
|
68
69
|
requirements: []
|
69
|
-
rubygems_version: 3.
|
70
|
+
rubygems_version: 3.4.1
|
70
71
|
signing_key:
|
71
72
|
specification_version: 4
|
72
73
|
summary: Time series anomaly detection for Ruby
|