anomaly_detection 0.1.4 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c60bb6d75cb8523ecd0926f391d79413a1cb2eb131cd579fd381bb6683f82da3
4
- data.tar.gz: '01594d0f0a97ad8cbb7b0b50cb30894bd0d773d4db45b3158345567ce1732efb'
3
+ metadata.gz: da5eb71023f77a4c05e6322c020ef602e8e22b7b5ba516fce99679af702c881d
4
+ data.tar.gz: 26560c8dd893c491bd3094202ff82ae33eefdcdba74fe4386b006f7f522906df
5
5
  SHA512:
6
- metadata.gz: fe09cc140a5d6543f3b00983a754861f6a3a3a436f8a8afecc80d202f1112bb6ea180df794072ee4711c044508a559a015c885cfd61d2c5be9378fc7b6590d96
7
- data.tar.gz: 5616e6075888b4521355e6c0fb33f7a94361c971c7f58f9ae6a61e5d8529a3e1938deba10be22c24ec5849f1909f48cba07b97fd1e6ea8b47ae4f66626eb703e
6
+ metadata.gz: ec2e1459ca2410ee6ab1bce3fe9c528d6419b75e10c6448f1fe5b3030a2e3d8de320a23a9bded17702a01fd23d112007b909c8611e2da6c1ff4f8521352c89ac
7
+ data.tar.gz: ad150705d6e32a111c3bc044ef7f99910beebe572e07799719e72118422b7a9e6439943cac8b86d613537d7d0fb52cba86a668faf78d135abc888ce3737f8104
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ ## 0.2.0 (2023-01-31)
2
+
3
+ - Added experimental support for auto-detecting period
4
+ - Fixed result when no seasonality (period is less than 2)
5
+ - Dropped support for Ruby < 2.7
6
+
1
7
  ## 0.1.4 (2022-03-19)
2
8
 
3
9
  - Fixed initial median calculation
data/NOTICE.txt CHANGED
@@ -1,5 +1,5 @@
1
1
  Copyright (C) 2015 Twitter, Inc and other contributors
2
- Copyright (C) 2021 Andrew Kane
2
+ Copyright (C) 2021-2023 Andrew Kane
3
3
 
4
4
  This program is free software: you can redistribute it and/or modify
5
5
  it under the terms of the GNU General Public License as published by
@@ -1,5 +1,5 @@
1
1
  /*!
2
- * AnomalyDetection.cpp v0.1.0
2
+ * AnomalyDetection.cpp v0.1.3
3
3
  * https://github.com/ankane/AnomalyDetection.cpp
4
4
  * GPL-3.0-or-later License
5
5
  */
@@ -39,7 +39,7 @@ float mad(const std::vector<float>& data, float med) {
39
39
  return 1.4826 * median_sorted(res);
40
40
  }
41
41
 
42
- std::vector<size_t> detect_anoms(const std::vector<float>& data, int num_obs_per_period, float k, float alpha, bool one_tail, bool upper_tail, bool verbose, std::function<void()> callback) {
42
+ std::vector<size_t> detect_anoms(const std::vector<float>& data, size_t num_obs_per_period, float k, float alpha, bool one_tail, bool upper_tail, bool verbose, std::function<void()> callback) {
43
43
  auto n = data.size();
44
44
 
45
45
  // Check to make sure we have at least two periods worth of data for anomaly context
@@ -53,15 +53,22 @@ std::vector<size_t> detect_anoms(const std::vector<float>& data, int num_obs_per
53
53
  throw std::invalid_argument("series contains NANs");
54
54
  }
55
55
 
56
- // Decompose data. This returns a univarite remainder which will be used for anomaly detection. Optionally, we might NOT decompose.
57
- auto data_decomp = stl::params().robust(true).seasonal_length(data.size() * 10 + 1).fit(data, num_obs_per_period);
58
- auto seasonal = data_decomp.seasonal;
59
-
60
56
  std::vector<float> data2;
61
57
  data2.reserve(n);
62
58
  auto med = median(data);
63
- for (auto i = 0; i < n; i++) {
64
- data2.push_back(data[i] - seasonal[i] - med);
59
+
60
+ if (num_obs_per_period > 1) {
61
+ // Decompose data. This returns a univarite remainder which will be used for anomaly detection. Optionally, we might NOT decompose.
62
+ auto data_decomp = stl::params().robust(true).seasonal_length(data.size() * 10 + 1).fit(data, num_obs_per_period);
63
+ auto seasonal = data_decomp.seasonal;
64
+
65
+ for (size_t i = 0; i < n; i++) {
66
+ data2.push_back(data[i] - seasonal[i] - med);
67
+ }
68
+ } else {
69
+ for (size_t i = 0; i < n; i++) {
70
+ data2.push_back(data[i] - med);
71
+ }
65
72
  }
66
73
 
67
74
  auto num_anoms = 0;
@@ -1,12 +1,11 @@
1
1
  /*!
2
- * dist.h v0.1.1
2
+ * dist.h v0.3.0
3
3
  * https://github.com/ankane/dist.h
4
4
  * Unlicense OR MIT License
5
5
  */
6
6
 
7
7
  #pragma once
8
8
 
9
- #include <assert.h>
10
9
  #include <math.h>
11
10
 
12
11
  #ifdef M_E
@@ -21,53 +20,77 @@
21
20
  #define DIST_PI 3.14159265358979323846
22
21
  #endif
23
22
 
24
- // Winitzki, S. (2008).
25
- // A handy approximation for the error function and its inverse.
26
- // https://drive.google.com/file/d/0B2Mt7luZYBrwZlctV3A3eF82VGM/view?resourcekey=0-UQpPhwZgzP0sF4LHBDlLtg
27
- // from https://sites.google.com/site/winitzki
28
- double erf(double x) {
29
- double sign = x < 0 ? -1.0 : 1.0;
30
- x = x < 0 ? -x : x;
31
-
32
- double a = 0.14;
33
- double x2 = x * x;
34
- return sign * sqrt(1.0 - exp(-x2 * (4.0 / DIST_PI + a * x2) / (1.0 + a * x2)));
35
- }
36
-
37
- // Winitzki, S. (2008).
38
- // A handy approximation for the error function and its inverse.
39
- // https://drive.google.com/file/d/0B2Mt7luZYBrwZlctV3A3eF82VGM/view?resourcekey=0-UQpPhwZgzP0sF4LHBDlLtg
40
- // from https://sites.google.com/site/winitzki
41
- double inverse_erf(double x) {
42
- double sign = x < 0 ? -1.0 : 1.0;
43
- x = x < 0 ? -x : x;
44
-
45
- double a = 0.147;
46
- double ln = log(1.0 - x * x);
47
- double f1 = 2.0 / (DIST_PI * a);
48
- double f2 = ln / 2.0;
49
- double f3 = f1 + f2;
50
- double f4 = 1.0 / a * ln;
51
- return sign * sqrt(-f1 - f2 + sqrt(f3 * f3 - f4));
52
- }
23
+ #ifdef M_SQRT2
24
+ #define DIST_SQRT2 M_SQRT2
25
+ #else
26
+ #define DIST_SQRT2 1.41421356237309504880
27
+ #endif
53
28
 
54
29
  double normal_pdf(double x, double mean, double std_dev) {
55
- double var = std_dev * std_dev;
56
- return (1.0 / (var * sqrt(2.0 * DIST_PI))) * pow(DIST_E, -0.5 * pow((x - mean) / var, 2));
30
+ if (std_dev <= 0) {
31
+ return NAN;
32
+ }
33
+
34
+ double n = (x - mean) / std_dev;
35
+ return (1.0 / (std_dev * sqrt(2.0 * DIST_PI))) * pow(DIST_E, -0.5 * n * n);
57
36
  }
58
37
 
59
38
  double normal_cdf(double x, double mean, double std_dev) {
60
- return 0.5 * (1.0 + erf((x - mean) / (std_dev * std_dev * sqrt(2))));
39
+ if (std_dev <= 0) {
40
+ return NAN;
41
+ }
42
+
43
+ return 0.5 * (1.0 + erf((x - mean) / (std_dev * DIST_SQRT2)));
61
44
  }
62
45
 
46
+ // Wichura, M. J. (1988).
47
+ // Algorithm AS 241: The Percentage Points of the Normal Distribution.
48
+ // Journal of the Royal Statistical Society. Series C (Applied Statistics), 37(3), 477-484.
63
49
  double normal_ppf(double p, double mean, double std_dev) {
64
- assert(p >= 0 && p <= 1);
50
+ if (p < 0 || p > 1 || std_dev <= 0 || isnan(mean) || isnan(std_dev)) {
51
+ return NAN;
52
+ }
53
+
54
+ if (p == 0) {
55
+ return -INFINITY;
56
+ }
65
57
 
66
- return mean + (std_dev * std_dev) * sqrt(2) * inverse_erf(2.0 * p - 1.0);
58
+ if (p == 1) {
59
+ return INFINITY;
60
+ }
61
+
62
+ double q = p - 0.5;
63
+ if (fabs(q) < 0.425) {
64
+ double r = 0.180625 - q * q;
65
+ return mean + std_dev * q *
66
+ (((((((2.5090809287301226727e3 * r + 3.3430575583588128105e4) * r + 6.7265770927008700853e4) * r + 4.5921953931549871457e4) * r + 1.3731693765509461125e4) * r + 1.9715909503065514427e3) * r + 1.3314166789178437745e2) * r + 3.3871328727963666080e0) /
67
+ (((((((5.2264952788528545610e3 * r + 2.8729085735721942674e4) * r + 3.9307895800092710610e4) * r + 2.1213794301586595867e4) * r + 5.3941960214247511077e3) * r + 6.8718700749205790830e2) * r + 4.2313330701600911252e1) * r + 1);
68
+ } else {
69
+ double r = q < 0 ? p : 1 - p;
70
+ r = sqrt(-log(r));
71
+ double sign = q < 0 ? -1 : 1;
72
+ if (r < 5) {
73
+ r -= 1.6;
74
+ return mean + std_dev * sign *
75
+ (((((((7.74545014278341407640e-4 * r + 2.27238449892691845833e-2) * r + 2.41780725177450611770e-1) * r + 1.27045825245236838258e0) * r + 3.64784832476320460504e0) * r + 5.76949722146069140550e0) * r + 4.63033784615654529590e0) * r + 1.42343711074968357734e0) /
76
+ (((((((1.05075007164441684324e-9 * r + 5.47593808499534494600e-4) * r + 1.51986665636164571966e-2) * r + 1.48103976427480074590e-1) * r + 6.89767334985100004550e-1) * r + 1.67638483018380384940e0) * r + 2.05319162663775882187e0) * r + 1);
77
+ } else {
78
+ r -= 5;
79
+ return mean + std_dev * sign *
80
+ (((((((2.01033439929228813265e-7 * r + 2.71155556874348757815e-5) * r + 1.24266094738807843860e-3) * r + 2.65321895265761230930e-2) * r + 2.96560571828504891230e-1) * r + 1.78482653991729133580e0) * r + 5.46378491116411436990e0) * r + 6.65790464350110377720e0) /
81
+ (((((((2.04426310338993978564e-15 * r + 1.42151175831644588870e-7) * r + 1.84631831751005468180e-5) * r + 7.86869131145613259100e-4) * r + 1.48753612908506148525e-2) * r + 1.36929880922735805310e-1) * r + 5.99832206555887937690e-1) * r + 1);
82
+ }
83
+ }
67
84
  }
68
85
 
69
- double students_t_pdf(double x, unsigned int n) {
70
- assert(n >= 1);
86
+ double students_t_pdf(double x, double n) {
87
+ if (n <= 0) {
88
+ return NAN;
89
+ }
90
+
91
+ if (n == INFINITY) {
92
+ return normal_pdf(x, 0, 1);
93
+ }
71
94
 
72
95
  return tgamma((n + 1.0) / 2.0) / (sqrt(n * DIST_PI) * tgamma(n / 2.0)) * pow(1.0 + x * x / n, -(n + 1.0) / 2.0);
73
96
  }
@@ -75,8 +98,22 @@ double students_t_pdf(double x, unsigned int n) {
75
98
  // Hill, G. W. (1970).
76
99
  // Algorithm 395: Student's t-distribution.
77
100
  // Communications of the ACM, 13(10), 617-619.
78
- double students_t_cdf(double x, unsigned int n) {
79
- assert(n >= 1);
101
+ double students_t_cdf(double x, double n) {
102
+ if (n < 1) {
103
+ return NAN;
104
+ }
105
+
106
+ if (isnan(x)) {
107
+ return NAN;
108
+ }
109
+
110
+ if (!isfinite(x)) {
111
+ return x < 0 ? 0 : 1;
112
+ }
113
+
114
+ if (n == INFINITY) {
115
+ return normal_cdf(x, 0, 1);
116
+ }
80
117
 
81
118
  double start = x < 0 ? 0 : 1;
82
119
  double sign = x < 0 ? 1 : -1;
@@ -86,7 +123,7 @@ double students_t_cdf(double x, unsigned int n) {
86
123
  double y = t / n;
87
124
  double b = 1.0 + y;
88
125
 
89
- if ((n >= 20 && t < n) || n > 200) {
126
+ if (n > floor(n) || (n >= 20 && t < n) || n > 200) {
90
127
  // asymptotic series for large or noninteger n
91
128
  if (y > 10e-6) {
92
129
  y = log(b);
@@ -98,6 +135,10 @@ double students_t_cdf(double x, unsigned int n) {
98
135
  return start + sign * normal_cdf(-y, 0.0, 1.0);
99
136
  }
100
137
 
138
+ // make n int
139
+ // n is int between 1 and 200 if made it here
140
+ n = (int) n;
141
+
101
142
  if (n < 20 && t < 4.0) {
102
143
  // nested summation of cosine series
103
144
  y = sqrt(y);
@@ -144,9 +185,14 @@ double students_t_cdf(double x, unsigned int n) {
144
185
  // Hill, G. W. (1970).
145
186
  // Algorithm 396: Student's t-quantiles.
146
187
  // Communications of the ACM, 13(10), 619-620.
147
- double students_t_ppf(double p, unsigned int n) {
148
- assert(p >= 0 && p <= 1);
149
- assert(n >= 1);
188
+ double students_t_ppf(double p, double n) {
189
+ if (p < 0 || p > 1 || n < 1) {
190
+ return NAN;
191
+ }
192
+
193
+ if (n == INFINITY) {
194
+ return normal_ppf(p, 0, 1);
195
+ }
150
196
 
151
197
  // distribution is symmetric
152
198
  double sign = p < 0.5 ? -1 : 1;
@@ -1,5 +1,5 @@
1
1
  /*!
2
- * STL C++ v0.1.0
2
+ * STL C++ v0.1.2
3
3
  * https://github.com/ankane/stl-cpp
4
4
  * Unlicense OR MIT License
5
5
  *
@@ -13,9 +13,10 @@
13
13
  #pragma once
14
14
 
15
15
  #include <algorithm>
16
- #include <cassert>
17
16
  #include <cmath>
17
+ #include <numeric>
18
18
  #include <optional>
19
+ #include <stdexcept>
19
20
  #include <vector>
20
21
 
21
22
  namespace stl {
@@ -91,14 +92,14 @@ void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool user
91
92
  return;
92
93
  }
93
94
 
94
- auto nleft = 0;
95
- auto nright = 0;
95
+ size_t nleft = 0;
96
+ size_t nright = 0;
96
97
 
97
98
  auto newnj = std::min(njump, n - 1);
98
99
  if (len >= n) {
99
100
  nleft = 1;
100
101
  nright = n;
101
- for (auto i = 1; i <= n; i += newnj) {
102
+ for (size_t i = 1; i <= n; i += newnj) {
102
103
  auto ok = est(y, n, len, ideg, (float) i, &ys[i - 1], nleft, nright, res, userw, rw);
103
104
  if (!ok) {
104
105
  ys[i - 1] = y[i - 1];
@@ -108,7 +109,7 @@ void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool user
108
109
  auto nsh = (len + 1) / 2;
109
110
  nleft = 1;
110
111
  nright = len;
111
- for (auto i = 1; i <= n; i++) { // fitted value at i
112
+ for (size_t i = 1; i <= n; i++) { // fitted value at i
112
113
  if (i > nsh && nright != n) {
113
114
  nleft += 1;
114
115
  nright += 1;
@@ -120,7 +121,7 @@ void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool user
120
121
  }
121
122
  } else { // newnj greater than one, len less than n
122
123
  auto nsh = (len + 1) / 2;
123
- for (auto i = 1; i <= n; i += newnj) { // fitted value at i
124
+ for (size_t i = 1; i <= n; i += newnj) { // fitted value at i
124
125
  if (i < nsh) {
125
126
  nleft = 1;
126
127
  nright = len;
@@ -139,7 +140,7 @@ void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool user
139
140
  }
140
141
 
141
142
  if (newnj != 1) {
142
- for (auto i = 1; i <= n - newnj; i += newnj) {
143
+ for (size_t i = 1; i <= n - newnj; i += newnj) {
143
144
  auto delta = (ys[i + newnj - 1] - ys[i - 1]) / ((float) newnj);
144
145
  for (auto j = i + 1; j <= i + newnj - 1; j++) {
145
146
  ys[j - 1] = ys[i - 1] + delta * ((float) (j - i));
@@ -167,7 +168,7 @@ void ma(const float* x, size_t n, size_t len, float* ave) {
167
168
  auto v = 0.0;
168
169
 
169
170
  // get the first average
170
- for (auto i = 0; i < len; i++) {
171
+ for (size_t i = 0; i < len; i++) {
171
172
  v += x[i];
172
173
  }
173
174
 
@@ -175,7 +176,7 @@ void ma(const float* x, size_t n, size_t len, float* ave) {
175
176
  if (newn > 1) {
176
177
  auto k = len;
177
178
  auto m = 0;
178
- for (auto j = 1; j < newn; j++) {
179
+ for (size_t j = 1; j < newn; j++) {
179
180
  // window down the array
180
181
  v = v - x[m] + x[k];
181
182
  ave[j] = v / flen;
@@ -192,7 +193,7 @@ void fts(const float* x, size_t n, size_t np, float* trend, float* work) {
192
193
  }
193
194
 
194
195
  void rwts(const float* y, size_t n, const float* fit, float* rw) {
195
- for (auto i = 0; i < n; i++) {
196
+ for (size_t i = 0; i < n; i++) {
196
197
  rw[i] = fabs(y[i] - fit[i]);
197
198
  }
198
199
 
@@ -206,7 +207,7 @@ void rwts(const float* y, size_t n, const float* fit, float* rw) {
206
207
  auto c9 = 0.999 * cmad;
207
208
  auto c1 = 0.001 * cmad;
208
209
 
209
- for (auto i = 0; i < n; i++) {
210
+ for (size_t i = 0; i < n; i++) {
210
211
  auto r = fabs(y[i] - fit[i]);
211
212
  if (r <= c1) {
212
213
  rw[i] = 1.0;
@@ -219,14 +220,14 @@ void rwts(const float* y, size_t n, const float* fit, float* rw) {
219
220
  }
220
221
 
221
222
  void ss(const float* y, size_t n, size_t np, size_t ns, int isdeg, size_t nsjump, bool userw, float* rw, float* season, float* work1, float* work2, float* work3, float* work4) {
222
- for (auto j = 1; j <= np; j++) {
223
- auto k = (n - j) / np + 1;
223
+ for (size_t j = 1; j <= np; j++) {
224
+ size_t k = (n - j) / np + 1;
224
225
 
225
- for (auto i = 1; i <= k; i++) {
226
+ for (size_t i = 1; i <= k; i++) {
226
227
  work1[i - 1] = y[(i - 1) * np + j - 1];
227
228
  }
228
229
  if (userw) {
229
- for (auto i = 1; i <= k; i++) {
230
+ for (size_t i = 1; i <= k; i++) {
230
231
  work3[i - 1] = rw[(i - 1) * np + j - 1];
231
232
  }
232
233
  }
@@ -243,25 +244,25 @@ void ss(const float* y, size_t n, size_t np, size_t ns, int isdeg, size_t nsjump
243
244
  if (!ok) {
244
245
  work2[k + 1] = work2[k];
245
246
  }
246
- for (auto m = 1; m <= k + 2; m++) {
247
+ for (size_t m = 1; m <= k + 2; m++) {
247
248
  season[(m - 1) * np + j - 1] = work2[m - 1];
248
249
  }
249
250
  }
250
251
  }
251
252
 
252
253
  void onestp(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, int isdeg, int itdeg, int ildeg, size_t nsjump, size_t ntjump, size_t nljump, size_t ni, bool userw, float* rw, float* season, float* trend, float* work1, float* work2, float* work3, float* work4, float* work5) {
253
- for (auto j = 0; j < ni; j++) {
254
- for (auto i = 0; i < n; i++) {
254
+ for (size_t j = 0; j < ni; j++) {
255
+ for (size_t i = 0; i < n; i++) {
255
256
  work1[i] = y[i] - trend[i];
256
257
  }
257
258
 
258
259
  ss(work1, n, np, ns, isdeg, nsjump, userw, rw, work2, work3, work4, work5, season);
259
260
  fts(work2, n + 2 * np, np, work3, work1);
260
261
  ess(work3, n, nl, ildeg, nljump, false, work4, work1, work5);
261
- for (auto i = 0; i < n; i++) {
262
+ for (size_t i = 0; i < n; i++) {
262
263
  season[i] = work2[np + i] - work1[i];
263
264
  }
264
- for (auto i = 0; i < n; i++) {
265
+ for (size_t i = 0; i < n; i++) {
265
266
  work1[i] = y[i] - season[i];
266
267
  }
267
268
  ess(work1, n, nt, itdeg, ntjump, userw, rw, trend, work3);
@@ -269,6 +270,39 @@ void onestp(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl
269
270
  }
270
271
 
271
272
  void stl(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, int isdeg, int itdeg, int ildeg, size_t nsjump, size_t ntjump, size_t nljump, size_t ni, size_t no, float* rw, float* season, float* trend) {
273
+ if (ns < 3) {
274
+ throw std::invalid_argument("seasonal_length must be at least 3");
275
+ }
276
+ if (nt < 3) {
277
+ throw std::invalid_argument("trend_length must be at least 3");
278
+ }
279
+ if (nl < 3) {
280
+ throw std::invalid_argument("low_pass_length must be at least 3");
281
+ }
282
+ if (np < 2) {
283
+ throw std::invalid_argument("period must be at least 2");
284
+ }
285
+
286
+ if (isdeg != 0 && isdeg != 1) {
287
+ throw std::invalid_argument("seasonal_degree must be 0 or 1");
288
+ }
289
+ if (itdeg != 0 && itdeg != 1) {
290
+ throw std::invalid_argument("trend_degree must be 0 or 1");
291
+ }
292
+ if (ildeg != 0 && ildeg != 1) {
293
+ throw std::invalid_argument("low_pass_degree must be 0 or 1");
294
+ }
295
+
296
+ if (ns % 2 != 1) {
297
+ throw std::invalid_argument("seasonal_length must be odd");
298
+ }
299
+ if (nt % 2 != 1) {
300
+ throw std::invalid_argument("trend_length must be odd");
301
+ }
302
+ if (nl % 2 != 1) {
303
+ throw std::invalid_argument("low_pass_length must be odd");
304
+ }
305
+
272
306
  auto work1 = std::vector<float>(n + 2 * np);
273
307
  auto work2 = std::vector<float>(n + 2 * np);
274
308
  auto work3 = std::vector<float>(n + 2 * np);
@@ -276,20 +310,7 @@ void stl(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, i
276
310
  auto work5 = std::vector<float>(n + 2 * np);
277
311
 
278
312
  auto userw = false;
279
- auto k = 0;
280
-
281
- assert(ns >= 3);
282
- assert(nt >= 3);
283
- assert(nl >= 3);
284
- assert(np >= 2);
285
-
286
- assert(isdeg == 0 || isdeg == 1);
287
- assert(itdeg == 0 || itdeg == 1);
288
- assert(ildeg == 0 || ildeg == 1);
289
-
290
- assert(ns % 2 == 1);
291
- assert(nt % 2 == 1);
292
- assert(nl % 2 == 1);
313
+ size_t k = 0;
293
314
 
294
315
  while (true) {
295
316
  onestp(y, n, np, ns, nt, nl, isdeg, itdeg, ildeg, nsjump, ntjump, nljump, ni, userw, rw, season, trend, work1.data(), work2.data(), work3.data(), work4.data(), work5.data());
@@ -297,7 +318,7 @@ void stl(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, i
297
318
  if (k > no) {
298
319
  break;
299
320
  }
300
- for (auto i = 0; i < n; i++) {
321
+ for (size_t i = 0; i < n; i++) {
301
322
  work1[i] = trend[i] + season[i];
302
323
  }
303
324
  rwts(y, n, work1.data(), rw);
@@ -305,18 +326,46 @@ void stl(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, i
305
326
  }
306
327
 
307
328
  if (no <= 0) {
308
- for (auto i = 0; i < n; i++) {
329
+ for (size_t i = 0; i < n; i++) {
309
330
  rw[i] = 1.0;
310
331
  }
311
332
  }
312
333
  }
313
334
 
335
+ float var(const std::vector<float>& series) {
336
+ auto mean = std::accumulate(series.begin(), series.end(), 0.0) / series.size();
337
+ std::vector<float> tmp;
338
+ tmp.reserve(series.size());
339
+ for (auto v : series) {
340
+ tmp.push_back(pow(v - mean, 2));
341
+ }
342
+ return std::accumulate(tmp.begin(), tmp.end(), 0.0) / (series.size() - 1);
343
+ }
344
+
314
345
  class StlResult {
315
346
  public:
316
347
  std::vector<float> seasonal;
317
348
  std::vector<float> trend;
318
349
  std::vector<float> remainder;
319
350
  std::vector<float> weights;
351
+
352
+ inline float seasonal_strength() {
353
+ std::vector<float> sr;
354
+ sr.reserve(remainder.size());
355
+ for (size_t i = 0; i < remainder.size(); i++) {
356
+ sr.push_back(seasonal[i] + remainder[i]);
357
+ }
358
+ return std::max(0.0, 1.0 - var(remainder) / var(sr));
359
+ }
360
+
361
+ inline float trend_strength() {
362
+ std::vector<float> tr;
363
+ tr.reserve(remainder.size());
364
+ for (size_t i = 0; i < remainder.size(); i++) {
365
+ tr.push_back(trend[i] + remainder[i]);
366
+ }
367
+ return std::max(0.0, 1.0 - var(remainder) / var(tr));
368
+ }
320
369
  };
321
370
 
322
371
  class StlParams {
@@ -337,62 +386,62 @@ public:
337
386
  inline StlParams seasonal_length(size_t ns) {
338
387
  this->ns_ = ns;
339
388
  return *this;
340
- };
389
+ }
341
390
 
342
391
  inline StlParams trend_length(size_t nt) {
343
392
  this->nt_ = nt;
344
393
  return *this;
345
- };
394
+ }
346
395
 
347
396
  inline StlParams low_pass_length(size_t nl) {
348
397
  this->nl_ = nl;
349
398
  return *this;
350
- };
399
+ }
351
400
 
352
401
  inline StlParams seasonal_degree(int isdeg) {
353
402
  this->isdeg_ = isdeg;
354
403
  return *this;
355
- };
404
+ }
356
405
 
357
406
  inline StlParams trend_degree(int itdeg) {
358
407
  this->itdeg_ = itdeg;
359
408
  return *this;
360
- };
409
+ }
361
410
 
362
411
  inline StlParams low_pass_degree(int ildeg) {
363
412
  this->ildeg_ = ildeg;
364
413
  return *this;
365
- };
414
+ }
366
415
 
367
416
  inline StlParams seasonal_jump(size_t nsjump) {
368
417
  this->nsjump_ = nsjump;
369
418
  return *this;
370
- };
419
+ }
371
420
 
372
421
  inline StlParams trend_jump(size_t ntjump) {
373
422
  this->ntjump_ = ntjump;
374
423
  return *this;
375
- };
424
+ }
376
425
 
377
426
  inline StlParams low_pass_jump(size_t nljump) {
378
427
  this->nljump_ = nljump;
379
428
  return *this;
380
- };
429
+ }
381
430
 
382
431
  inline StlParams inner_loops(bool ni) {
383
432
  this->ni_ = ni;
384
433
  return *this;
385
- };
434
+ }
386
435
 
387
436
  inline StlParams outer_loops(bool no) {
388
437
  this->no_ = no;
389
438
  return *this;
390
- };
439
+ }
391
440
 
392
441
  inline StlParams robust(bool robust) {
393
442
  this->robust_ = robust;
394
443
  return *this;
395
- };
444
+ }
396
445
 
397
446
  StlResult fit(const float* y, size_t n, size_t np);
398
447
  StlResult fit(const std::vector<float>& y, size_t np);
@@ -403,6 +452,10 @@ StlParams params() {
403
452
  }
404
453
 
405
454
  StlResult StlParams::fit(const float* y, size_t n, size_t np) {
455
+ if (n < 2 * np) {
456
+ throw std::invalid_argument("series has less than two periods");
457
+ }
458
+
406
459
  auto ns = this->ns_.value_or(np);
407
460
 
408
461
  auto isdeg = this->isdeg_;
@@ -444,7 +497,7 @@ StlResult StlParams::fit(const float* y, size_t n, size_t np) {
444
497
  stl(y, n, newnp, newns, nt, nl, isdeg, itdeg, ildeg, nsjump, ntjump, nljump, ni, no, res.weights.data(), res.seasonal.data(), res.trend.data());
445
498
 
446
499
  res.remainder.reserve(n);
447
- for (auto i = 0; i < n; i++) {
500
+ for (size_t i = 0; i < n; i++) {
448
501
  res.remainder.push_back(y[i] - res.seasonal[i] - res.trend[i]);
449
502
  }
450
503
 
@@ -1,3 +1,3 @@
1
1
  module AnomalyDetection
2
- VERSION = "0.1.4"
2
+ VERSION = "0.2.0"
3
3
  end
@@ -1,12 +1,19 @@
1
1
  # extensions
2
- require "anomaly_detection/ext"
2
+ require_relative "anomaly_detection/ext"
3
3
 
4
4
  # modules
5
- require "anomaly_detection/version"
5
+ require_relative "anomaly_detection/version"
6
6
 
7
7
  module AnomalyDetection
8
8
  class << self
9
9
  def detect(series, period:, max_anoms: 0.1, alpha: 0.05, direction: "both", plot: false, verbose: false)
10
+ if period == :auto
11
+ period = determine_period(series)
12
+ puts "Set period to #{period}" if verbose
13
+ elsif period.nil?
14
+ period = 1
15
+ end
16
+
10
17
  raise ArgumentError, "series must contain at least 2 periods" if series.size < period * 2
11
18
 
12
19
  if series.is_a?(Hash)
@@ -16,6 +23,9 @@ module AnomalyDetection
16
23
  x = series
17
24
  end
18
25
 
26
+ # flush Ruby output since std::endl flushes C++ output
27
+ $stdout.flush if verbose
28
+
19
29
  res = _detect(x, period, max_anoms, alpha, direction, verbose)
20
30
  res.map! { |i| sorted[i][0] } if series.is_a?(Hash)
21
31
  res
@@ -63,6 +73,51 @@ module AnomalyDetection
63
73
  .config(axis: {title: nil, labelFontSize: 12})
64
74
  end
65
75
 
76
+ # determine period based on time keys (experimental)
77
+ # in future, could use an approach that looks at values
78
+ # like https://stats.stackexchange.com/a/1214
79
+ def determine_period(series)
80
+ unless series.is_a?(Hash)
81
+ raise ArgumentError, "series must be a hash for :auto period"
82
+ end
83
+
84
+ times = series.keys.map(&:to_time)
85
+
86
+ second = times.all? { |t| t.nsec == 0 }
87
+ minute = second && times.all? { |t| t.sec == 0 }
88
+ hour = minute && times.all? { |t| t.min == 0 }
89
+ day = hour && times.all? { |t| t.hour == 0 }
90
+ week = day && times.map { |k| k.wday }.uniq.size == 1
91
+ month = day && times.all? { |k| k.day == 1 }
92
+ quarter = month && times.all? { |k| k.month % 3 == 1 }
93
+ year = quarter && times.all? { |k| k.month == 1 }
94
+
95
+ period =
96
+ if year
97
+ 1
98
+ elsif quarter
99
+ 4
100
+ elsif month
101
+ 12
102
+ elsif week
103
+ 52
104
+ elsif day
105
+ 7
106
+ elsif hour
107
+ 24 # or 24 * 7
108
+ elsif minute
109
+ 60 # or 60 * 24
110
+ elsif second
111
+ 60 # or 60 * 60
112
+ end
113
+
114
+ if series.size < period * 2
115
+ 1
116
+ else
117
+ period
118
+ end
119
+ end
120
+
66
121
  private
67
122
 
68
123
  def iso8601(v)
@@ -0,0 +1,15 @@
1
+ Copyright (C) 2015 Twitter, Inc and other contributors
2
+ Copyright (C) 2022 Andrew Kane
3
+
4
+ This program is free software: you can redistribute it and/or modify
5
+ it under the terms of the GNU General Public License as published by
6
+ the Free Software Foundation, either version 3 of the License, or
7
+ (at your option) any later version.
8
+
9
+ This program is distributed in the hope that it will be useful,
10
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ GNU General Public License for more details.
13
+
14
+ You should have received a copy of the GNU General Public License
15
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: anomaly_detection
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-03-19 00:00:00.000000000 Z
11
+ date: 2023-02-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rice
@@ -45,6 +45,7 @@ files:
45
45
  - licenses/LICENSE-AnomalyDetection-cpp.txt
46
46
  - licenses/LICENSE-MIT-dist-h.txt
47
47
  - licenses/LICENSE-MIT-stl-cpp.txt
48
+ - licenses/NOTICE-AnomalyDetection-cpp.txt
48
49
  - licenses/UNLICENSE-dist-h.txt
49
50
  - licenses/UNLICENSE-stl-cpp.txt
50
51
  homepage: https://github.com/ankane/AnomalyDetection.rb
@@ -59,14 +60,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
59
60
  requirements:
60
61
  - - ">="
61
62
  - !ruby/object:Gem::Version
62
- version: '2.6'
63
+ version: '2.7'
63
64
  required_rubygems_version: !ruby/object:Gem::Requirement
64
65
  requirements:
65
66
  - - ">="
66
67
  - !ruby/object:Gem::Version
67
68
  version: '0'
68
69
  requirements: []
69
- rubygems_version: 3.3.7
70
+ rubygems_version: 3.4.1
70
71
  signing_key:
71
72
  specification_version: 4
72
73
  summary: Time series anomaly detection for Ruby