anomaly_detection 0.1.4 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c60bb6d75cb8523ecd0926f391d79413a1cb2eb131cd579fd381bb6683f82da3
4
- data.tar.gz: '01594d0f0a97ad8cbb7b0b50cb30894bd0d773d4db45b3158345567ce1732efb'
3
+ metadata.gz: da5eb71023f77a4c05e6322c020ef602e8e22b7b5ba516fce99679af702c881d
4
+ data.tar.gz: 26560c8dd893c491bd3094202ff82ae33eefdcdba74fe4386b006f7f522906df
5
5
  SHA512:
6
- metadata.gz: fe09cc140a5d6543f3b00983a754861f6a3a3a436f8a8afecc80d202f1112bb6ea180df794072ee4711c044508a559a015c885cfd61d2c5be9378fc7b6590d96
7
- data.tar.gz: 5616e6075888b4521355e6c0fb33f7a94361c971c7f58f9ae6a61e5d8529a3e1938deba10be22c24ec5849f1909f48cba07b97fd1e6ea8b47ae4f66626eb703e
6
+ metadata.gz: ec2e1459ca2410ee6ab1bce3fe9c528d6419b75e10c6448f1fe5b3030a2e3d8de320a23a9bded17702a01fd23d112007b909c8611e2da6c1ff4f8521352c89ac
7
+ data.tar.gz: ad150705d6e32a111c3bc044ef7f99910beebe572e07799719e72118422b7a9e6439943cac8b86d613537d7d0fb52cba86a668faf78d135abc888ce3737f8104
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ ## 0.2.0 (2023-01-31)
2
+
3
+ - Added experimental support for auto-detecting period
4
+ - Fixed result when no seasonality (period is less than 2)
5
+ - Dropped support for Ruby < 2.7
6
+
1
7
  ## 0.1.4 (2022-03-19)
2
8
 
3
9
  - Fixed initial median calculation
data/NOTICE.txt CHANGED
@@ -1,5 +1,5 @@
1
1
  Copyright (C) 2015 Twitter, Inc and other contributors
2
- Copyright (C) 2021 Andrew Kane
2
+ Copyright (C) 2021-2023 Andrew Kane
3
3
 
4
4
  This program is free software: you can redistribute it and/or modify
5
5
  it under the terms of the GNU General Public License as published by
@@ -1,5 +1,5 @@
1
1
  /*!
2
- * AnomalyDetection.cpp v0.1.0
2
+ * AnomalyDetection.cpp v0.1.3
3
3
  * https://github.com/ankane/AnomalyDetection.cpp
4
4
  * GPL-3.0-or-later License
5
5
  */
@@ -39,7 +39,7 @@ float mad(const std::vector<float>& data, float med) {
39
39
  return 1.4826 * median_sorted(res);
40
40
  }
41
41
 
42
- std::vector<size_t> detect_anoms(const std::vector<float>& data, int num_obs_per_period, float k, float alpha, bool one_tail, bool upper_tail, bool verbose, std::function<void()> callback) {
42
+ std::vector<size_t> detect_anoms(const std::vector<float>& data, size_t num_obs_per_period, float k, float alpha, bool one_tail, bool upper_tail, bool verbose, std::function<void()> callback) {
43
43
  auto n = data.size();
44
44
 
45
45
  // Check to make sure we have at least two periods worth of data for anomaly context
@@ -53,15 +53,22 @@ std::vector<size_t> detect_anoms(const std::vector<float>& data, int num_obs_per
53
53
  throw std::invalid_argument("series contains NANs");
54
54
  }
55
55
 
56
- // Decompose data. This returns a univarite remainder which will be used for anomaly detection. Optionally, we might NOT decompose.
57
- auto data_decomp = stl::params().robust(true).seasonal_length(data.size() * 10 + 1).fit(data, num_obs_per_period);
58
- auto seasonal = data_decomp.seasonal;
59
-
60
56
  std::vector<float> data2;
61
57
  data2.reserve(n);
62
58
  auto med = median(data);
63
- for (auto i = 0; i < n; i++) {
64
- data2.push_back(data[i] - seasonal[i] - med);
59
+
60
+ if (num_obs_per_period > 1) {
61
+ // Decompose data. This returns a univarite remainder which will be used for anomaly detection. Optionally, we might NOT decompose.
62
+ auto data_decomp = stl::params().robust(true).seasonal_length(data.size() * 10 + 1).fit(data, num_obs_per_period);
63
+ auto seasonal = data_decomp.seasonal;
64
+
65
+ for (size_t i = 0; i < n; i++) {
66
+ data2.push_back(data[i] - seasonal[i] - med);
67
+ }
68
+ } else {
69
+ for (size_t i = 0; i < n; i++) {
70
+ data2.push_back(data[i] - med);
71
+ }
65
72
  }
66
73
 
67
74
  auto num_anoms = 0;
@@ -1,12 +1,11 @@
1
1
  /*!
2
- * dist.h v0.1.1
2
+ * dist.h v0.3.0
3
3
  * https://github.com/ankane/dist.h
4
4
  * Unlicense OR MIT License
5
5
  */
6
6
 
7
7
  #pragma once
8
8
 
9
- #include <assert.h>
10
9
  #include <math.h>
11
10
 
12
11
  #ifdef M_E
@@ -21,53 +20,77 @@
21
20
  #define DIST_PI 3.14159265358979323846
22
21
  #endif
23
22
 
24
- // Winitzki, S. (2008).
25
- // A handy approximation for the error function and its inverse.
26
- // https://drive.google.com/file/d/0B2Mt7luZYBrwZlctV3A3eF82VGM/view?resourcekey=0-UQpPhwZgzP0sF4LHBDlLtg
27
- // from https://sites.google.com/site/winitzki
28
- double erf(double x) {
29
- double sign = x < 0 ? -1.0 : 1.0;
30
- x = x < 0 ? -x : x;
31
-
32
- double a = 0.14;
33
- double x2 = x * x;
34
- return sign * sqrt(1.0 - exp(-x2 * (4.0 / DIST_PI + a * x2) / (1.0 + a * x2)));
35
- }
36
-
37
- // Winitzki, S. (2008).
38
- // A handy approximation for the error function and its inverse.
39
- // https://drive.google.com/file/d/0B2Mt7luZYBrwZlctV3A3eF82VGM/view?resourcekey=0-UQpPhwZgzP0sF4LHBDlLtg
40
- // from https://sites.google.com/site/winitzki
41
- double inverse_erf(double x) {
42
- double sign = x < 0 ? -1.0 : 1.0;
43
- x = x < 0 ? -x : x;
44
-
45
- double a = 0.147;
46
- double ln = log(1.0 - x * x);
47
- double f1 = 2.0 / (DIST_PI * a);
48
- double f2 = ln / 2.0;
49
- double f3 = f1 + f2;
50
- double f4 = 1.0 / a * ln;
51
- return sign * sqrt(-f1 - f2 + sqrt(f3 * f3 - f4));
52
- }
23
+ #ifdef M_SQRT2
24
+ #define DIST_SQRT2 M_SQRT2
25
+ #else
26
+ #define DIST_SQRT2 1.41421356237309504880
27
+ #endif
53
28
 
54
29
  double normal_pdf(double x, double mean, double std_dev) {
55
- double var = std_dev * std_dev;
56
- return (1.0 / (var * sqrt(2.0 * DIST_PI))) * pow(DIST_E, -0.5 * pow((x - mean) / var, 2));
30
+ if (std_dev <= 0) {
31
+ return NAN;
32
+ }
33
+
34
+ double n = (x - mean) / std_dev;
35
+ return (1.0 / (std_dev * sqrt(2.0 * DIST_PI))) * pow(DIST_E, -0.5 * n * n);
57
36
  }
58
37
 
59
38
  double normal_cdf(double x, double mean, double std_dev) {
60
- return 0.5 * (1.0 + erf((x - mean) / (std_dev * std_dev * sqrt(2))));
39
+ if (std_dev <= 0) {
40
+ return NAN;
41
+ }
42
+
43
+ return 0.5 * (1.0 + erf((x - mean) / (std_dev * DIST_SQRT2)));
61
44
  }
62
45
 
46
+ // Wichura, M. J. (1988).
47
+ // Algorithm AS 241: The Percentage Points of the Normal Distribution.
48
+ // Journal of the Royal Statistical Society. Series C (Applied Statistics), 37(3), 477-484.
63
49
  double normal_ppf(double p, double mean, double std_dev) {
64
- assert(p >= 0 && p <= 1);
50
+ if (p < 0 || p > 1 || std_dev <= 0 || isnan(mean) || isnan(std_dev)) {
51
+ return NAN;
52
+ }
53
+
54
+ if (p == 0) {
55
+ return -INFINITY;
56
+ }
65
57
 
66
- return mean + (std_dev * std_dev) * sqrt(2) * inverse_erf(2.0 * p - 1.0);
58
+ if (p == 1) {
59
+ return INFINITY;
60
+ }
61
+
62
+ double q = p - 0.5;
63
+ if (fabs(q) < 0.425) {
64
+ double r = 0.180625 - q * q;
65
+ return mean + std_dev * q *
66
+ (((((((2.5090809287301226727e3 * r + 3.3430575583588128105e4) * r + 6.7265770927008700853e4) * r + 4.5921953931549871457e4) * r + 1.3731693765509461125e4) * r + 1.9715909503065514427e3) * r + 1.3314166789178437745e2) * r + 3.3871328727963666080e0) /
67
+ (((((((5.2264952788528545610e3 * r + 2.8729085735721942674e4) * r + 3.9307895800092710610e4) * r + 2.1213794301586595867e4) * r + 5.3941960214247511077e3) * r + 6.8718700749205790830e2) * r + 4.2313330701600911252e1) * r + 1);
68
+ } else {
69
+ double r = q < 0 ? p : 1 - p;
70
+ r = sqrt(-log(r));
71
+ double sign = q < 0 ? -1 : 1;
72
+ if (r < 5) {
73
+ r -= 1.6;
74
+ return mean + std_dev * sign *
75
+ (((((((7.74545014278341407640e-4 * r + 2.27238449892691845833e-2) * r + 2.41780725177450611770e-1) * r + 1.27045825245236838258e0) * r + 3.64784832476320460504e0) * r + 5.76949722146069140550e0) * r + 4.63033784615654529590e0) * r + 1.42343711074968357734e0) /
76
+ (((((((1.05075007164441684324e-9 * r + 5.47593808499534494600e-4) * r + 1.51986665636164571966e-2) * r + 1.48103976427480074590e-1) * r + 6.89767334985100004550e-1) * r + 1.67638483018380384940e0) * r + 2.05319162663775882187e0) * r + 1);
77
+ } else {
78
+ r -= 5;
79
+ return mean + std_dev * sign *
80
+ (((((((2.01033439929228813265e-7 * r + 2.71155556874348757815e-5) * r + 1.24266094738807843860e-3) * r + 2.65321895265761230930e-2) * r + 2.96560571828504891230e-1) * r + 1.78482653991729133580e0) * r + 5.46378491116411436990e0) * r + 6.65790464350110377720e0) /
81
+ (((((((2.04426310338993978564e-15 * r + 1.42151175831644588870e-7) * r + 1.84631831751005468180e-5) * r + 7.86869131145613259100e-4) * r + 1.48753612908506148525e-2) * r + 1.36929880922735805310e-1) * r + 5.99832206555887937690e-1) * r + 1);
82
+ }
83
+ }
67
84
  }
68
85
 
69
- double students_t_pdf(double x, unsigned int n) {
70
- assert(n >= 1);
86
+ double students_t_pdf(double x, double n) {
87
+ if (n <= 0) {
88
+ return NAN;
89
+ }
90
+
91
+ if (n == INFINITY) {
92
+ return normal_pdf(x, 0, 1);
93
+ }
71
94
 
72
95
  return tgamma((n + 1.0) / 2.0) / (sqrt(n * DIST_PI) * tgamma(n / 2.0)) * pow(1.0 + x * x / n, -(n + 1.0) / 2.0);
73
96
  }
@@ -75,8 +98,22 @@ double students_t_pdf(double x, unsigned int n) {
75
98
  // Hill, G. W. (1970).
76
99
  // Algorithm 395: Student's t-distribution.
77
100
  // Communications of the ACM, 13(10), 617-619.
78
- double students_t_cdf(double x, unsigned int n) {
79
- assert(n >= 1);
101
+ double students_t_cdf(double x, double n) {
102
+ if (n < 1) {
103
+ return NAN;
104
+ }
105
+
106
+ if (isnan(x)) {
107
+ return NAN;
108
+ }
109
+
110
+ if (!isfinite(x)) {
111
+ return x < 0 ? 0 : 1;
112
+ }
113
+
114
+ if (n == INFINITY) {
115
+ return normal_cdf(x, 0, 1);
116
+ }
80
117
 
81
118
  double start = x < 0 ? 0 : 1;
82
119
  double sign = x < 0 ? 1 : -1;
@@ -86,7 +123,7 @@ double students_t_cdf(double x, unsigned int n) {
86
123
  double y = t / n;
87
124
  double b = 1.0 + y;
88
125
 
89
- if ((n >= 20 && t < n) || n > 200) {
126
+ if (n > floor(n) || (n >= 20 && t < n) || n > 200) {
90
127
  // asymptotic series for large or noninteger n
91
128
  if (y > 10e-6) {
92
129
  y = log(b);
@@ -98,6 +135,10 @@ double students_t_cdf(double x, unsigned int n) {
98
135
  return start + sign * normal_cdf(-y, 0.0, 1.0);
99
136
  }
100
137
 
138
+ // make n int
139
+ // n is int between 1 and 200 if made it here
140
+ n = (int) n;
141
+
101
142
  if (n < 20 && t < 4.0) {
102
143
  // nested summation of cosine series
103
144
  y = sqrt(y);
@@ -144,9 +185,14 @@ double students_t_cdf(double x, unsigned int n) {
144
185
  // Hill, G. W. (1970).
145
186
  // Algorithm 396: Student's t-quantiles.
146
187
  // Communications of the ACM, 13(10), 619-620.
147
- double students_t_ppf(double p, unsigned int n) {
148
- assert(p >= 0 && p <= 1);
149
- assert(n >= 1);
188
+ double students_t_ppf(double p, double n) {
189
+ if (p < 0 || p > 1 || n < 1) {
190
+ return NAN;
191
+ }
192
+
193
+ if (n == INFINITY) {
194
+ return normal_ppf(p, 0, 1);
195
+ }
150
196
 
151
197
  // distribution is symmetric
152
198
  double sign = p < 0.5 ? -1 : 1;
@@ -1,5 +1,5 @@
1
1
  /*!
2
- * STL C++ v0.1.0
2
+ * STL C++ v0.1.2
3
3
  * https://github.com/ankane/stl-cpp
4
4
  * Unlicense OR MIT License
5
5
  *
@@ -13,9 +13,10 @@
13
13
  #pragma once
14
14
 
15
15
  #include <algorithm>
16
- #include <cassert>
17
16
  #include <cmath>
17
+ #include <numeric>
18
18
  #include <optional>
19
+ #include <stdexcept>
19
20
  #include <vector>
20
21
 
21
22
  namespace stl {
@@ -91,14 +92,14 @@ void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool user
91
92
  return;
92
93
  }
93
94
 
94
- auto nleft = 0;
95
- auto nright = 0;
95
+ size_t nleft = 0;
96
+ size_t nright = 0;
96
97
 
97
98
  auto newnj = std::min(njump, n - 1);
98
99
  if (len >= n) {
99
100
  nleft = 1;
100
101
  nright = n;
101
- for (auto i = 1; i <= n; i += newnj) {
102
+ for (size_t i = 1; i <= n; i += newnj) {
102
103
  auto ok = est(y, n, len, ideg, (float) i, &ys[i - 1], nleft, nright, res, userw, rw);
103
104
  if (!ok) {
104
105
  ys[i - 1] = y[i - 1];
@@ -108,7 +109,7 @@ void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool user
108
109
  auto nsh = (len + 1) / 2;
109
110
  nleft = 1;
110
111
  nright = len;
111
- for (auto i = 1; i <= n; i++) { // fitted value at i
112
+ for (size_t i = 1; i <= n; i++) { // fitted value at i
112
113
  if (i > nsh && nright != n) {
113
114
  nleft += 1;
114
115
  nright += 1;
@@ -120,7 +121,7 @@ void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool user
120
121
  }
121
122
  } else { // newnj greater than one, len less than n
122
123
  auto nsh = (len + 1) / 2;
123
- for (auto i = 1; i <= n; i += newnj) { // fitted value at i
124
+ for (size_t i = 1; i <= n; i += newnj) { // fitted value at i
124
125
  if (i < nsh) {
125
126
  nleft = 1;
126
127
  nright = len;
@@ -139,7 +140,7 @@ void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool user
139
140
  }
140
141
 
141
142
  if (newnj != 1) {
142
- for (auto i = 1; i <= n - newnj; i += newnj) {
143
+ for (size_t i = 1; i <= n - newnj; i += newnj) {
143
144
  auto delta = (ys[i + newnj - 1] - ys[i - 1]) / ((float) newnj);
144
145
  for (auto j = i + 1; j <= i + newnj - 1; j++) {
145
146
  ys[j - 1] = ys[i - 1] + delta * ((float) (j - i));
@@ -167,7 +168,7 @@ void ma(const float* x, size_t n, size_t len, float* ave) {
167
168
  auto v = 0.0;
168
169
 
169
170
  // get the first average
170
- for (auto i = 0; i < len; i++) {
171
+ for (size_t i = 0; i < len; i++) {
171
172
  v += x[i];
172
173
  }
173
174
 
@@ -175,7 +176,7 @@ void ma(const float* x, size_t n, size_t len, float* ave) {
175
176
  if (newn > 1) {
176
177
  auto k = len;
177
178
  auto m = 0;
178
- for (auto j = 1; j < newn; j++) {
179
+ for (size_t j = 1; j < newn; j++) {
179
180
  // window down the array
180
181
  v = v - x[m] + x[k];
181
182
  ave[j] = v / flen;
@@ -192,7 +193,7 @@ void fts(const float* x, size_t n, size_t np, float* trend, float* work) {
192
193
  }
193
194
 
194
195
  void rwts(const float* y, size_t n, const float* fit, float* rw) {
195
- for (auto i = 0; i < n; i++) {
196
+ for (size_t i = 0; i < n; i++) {
196
197
  rw[i] = fabs(y[i] - fit[i]);
197
198
  }
198
199
 
@@ -206,7 +207,7 @@ void rwts(const float* y, size_t n, const float* fit, float* rw) {
206
207
  auto c9 = 0.999 * cmad;
207
208
  auto c1 = 0.001 * cmad;
208
209
 
209
- for (auto i = 0; i < n; i++) {
210
+ for (size_t i = 0; i < n; i++) {
210
211
  auto r = fabs(y[i] - fit[i]);
211
212
  if (r <= c1) {
212
213
  rw[i] = 1.0;
@@ -219,14 +220,14 @@ void rwts(const float* y, size_t n, const float* fit, float* rw) {
219
220
  }
220
221
 
221
222
  void ss(const float* y, size_t n, size_t np, size_t ns, int isdeg, size_t nsjump, bool userw, float* rw, float* season, float* work1, float* work2, float* work3, float* work4) {
222
- for (auto j = 1; j <= np; j++) {
223
- auto k = (n - j) / np + 1;
223
+ for (size_t j = 1; j <= np; j++) {
224
+ size_t k = (n - j) / np + 1;
224
225
 
225
- for (auto i = 1; i <= k; i++) {
226
+ for (size_t i = 1; i <= k; i++) {
226
227
  work1[i - 1] = y[(i - 1) * np + j - 1];
227
228
  }
228
229
  if (userw) {
229
- for (auto i = 1; i <= k; i++) {
230
+ for (size_t i = 1; i <= k; i++) {
230
231
  work3[i - 1] = rw[(i - 1) * np + j - 1];
231
232
  }
232
233
  }
@@ -243,25 +244,25 @@ void ss(const float* y, size_t n, size_t np, size_t ns, int isdeg, size_t nsjump
243
244
  if (!ok) {
244
245
  work2[k + 1] = work2[k];
245
246
  }
246
- for (auto m = 1; m <= k + 2; m++) {
247
+ for (size_t m = 1; m <= k + 2; m++) {
247
248
  season[(m - 1) * np + j - 1] = work2[m - 1];
248
249
  }
249
250
  }
250
251
  }
251
252
 
252
253
  void onestp(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, int isdeg, int itdeg, int ildeg, size_t nsjump, size_t ntjump, size_t nljump, size_t ni, bool userw, float* rw, float* season, float* trend, float* work1, float* work2, float* work3, float* work4, float* work5) {
253
- for (auto j = 0; j < ni; j++) {
254
- for (auto i = 0; i < n; i++) {
254
+ for (size_t j = 0; j < ni; j++) {
255
+ for (size_t i = 0; i < n; i++) {
255
256
  work1[i] = y[i] - trend[i];
256
257
  }
257
258
 
258
259
  ss(work1, n, np, ns, isdeg, nsjump, userw, rw, work2, work3, work4, work5, season);
259
260
  fts(work2, n + 2 * np, np, work3, work1);
260
261
  ess(work3, n, nl, ildeg, nljump, false, work4, work1, work5);
261
- for (auto i = 0; i < n; i++) {
262
+ for (size_t i = 0; i < n; i++) {
262
263
  season[i] = work2[np + i] - work1[i];
263
264
  }
264
- for (auto i = 0; i < n; i++) {
265
+ for (size_t i = 0; i < n; i++) {
265
266
  work1[i] = y[i] - season[i];
266
267
  }
267
268
  ess(work1, n, nt, itdeg, ntjump, userw, rw, trend, work3);
@@ -269,6 +270,39 @@ void onestp(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl
269
270
  }
270
271
 
271
272
  void stl(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, int isdeg, int itdeg, int ildeg, size_t nsjump, size_t ntjump, size_t nljump, size_t ni, size_t no, float* rw, float* season, float* trend) {
273
+ if (ns < 3) {
274
+ throw std::invalid_argument("seasonal_length must be at least 3");
275
+ }
276
+ if (nt < 3) {
277
+ throw std::invalid_argument("trend_length must be at least 3");
278
+ }
279
+ if (nl < 3) {
280
+ throw std::invalid_argument("low_pass_length must be at least 3");
281
+ }
282
+ if (np < 2) {
283
+ throw std::invalid_argument("period must be at least 2");
284
+ }
285
+
286
+ if (isdeg != 0 && isdeg != 1) {
287
+ throw std::invalid_argument("seasonal_degree must be 0 or 1");
288
+ }
289
+ if (itdeg != 0 && itdeg != 1) {
290
+ throw std::invalid_argument("trend_degree must be 0 or 1");
291
+ }
292
+ if (ildeg != 0 && ildeg != 1) {
293
+ throw std::invalid_argument("low_pass_degree must be 0 or 1");
294
+ }
295
+
296
+ if (ns % 2 != 1) {
297
+ throw std::invalid_argument("seasonal_length must be odd");
298
+ }
299
+ if (nt % 2 != 1) {
300
+ throw std::invalid_argument("trend_length must be odd");
301
+ }
302
+ if (nl % 2 != 1) {
303
+ throw std::invalid_argument("low_pass_length must be odd");
304
+ }
305
+
272
306
  auto work1 = std::vector<float>(n + 2 * np);
273
307
  auto work2 = std::vector<float>(n + 2 * np);
274
308
  auto work3 = std::vector<float>(n + 2 * np);
@@ -276,20 +310,7 @@ void stl(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, i
276
310
  auto work5 = std::vector<float>(n + 2 * np);
277
311
 
278
312
  auto userw = false;
279
- auto k = 0;
280
-
281
- assert(ns >= 3);
282
- assert(nt >= 3);
283
- assert(nl >= 3);
284
- assert(np >= 2);
285
-
286
- assert(isdeg == 0 || isdeg == 1);
287
- assert(itdeg == 0 || itdeg == 1);
288
- assert(ildeg == 0 || ildeg == 1);
289
-
290
- assert(ns % 2 == 1);
291
- assert(nt % 2 == 1);
292
- assert(nl % 2 == 1);
313
+ size_t k = 0;
293
314
 
294
315
  while (true) {
295
316
  onestp(y, n, np, ns, nt, nl, isdeg, itdeg, ildeg, nsjump, ntjump, nljump, ni, userw, rw, season, trend, work1.data(), work2.data(), work3.data(), work4.data(), work5.data());
@@ -297,7 +318,7 @@ void stl(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, i
297
318
  if (k > no) {
298
319
  break;
299
320
  }
300
- for (auto i = 0; i < n; i++) {
321
+ for (size_t i = 0; i < n; i++) {
301
322
  work1[i] = trend[i] + season[i];
302
323
  }
303
324
  rwts(y, n, work1.data(), rw);
@@ -305,18 +326,46 @@ void stl(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, i
305
326
  }
306
327
 
307
328
  if (no <= 0) {
308
- for (auto i = 0; i < n; i++) {
329
+ for (size_t i = 0; i < n; i++) {
309
330
  rw[i] = 1.0;
310
331
  }
311
332
  }
312
333
  }
313
334
 
335
+ float var(const std::vector<float>& series) {
336
+ auto mean = std::accumulate(series.begin(), series.end(), 0.0) / series.size();
337
+ std::vector<float> tmp;
338
+ tmp.reserve(series.size());
339
+ for (auto v : series) {
340
+ tmp.push_back(pow(v - mean, 2));
341
+ }
342
+ return std::accumulate(tmp.begin(), tmp.end(), 0.0) / (series.size() - 1);
343
+ }
344
+
314
345
  class StlResult {
315
346
  public:
316
347
  std::vector<float> seasonal;
317
348
  std::vector<float> trend;
318
349
  std::vector<float> remainder;
319
350
  std::vector<float> weights;
351
+
352
+ inline float seasonal_strength() {
353
+ std::vector<float> sr;
354
+ sr.reserve(remainder.size());
355
+ for (size_t i = 0; i < remainder.size(); i++) {
356
+ sr.push_back(seasonal[i] + remainder[i]);
357
+ }
358
+ return std::max(0.0, 1.0 - var(remainder) / var(sr));
359
+ }
360
+
361
+ inline float trend_strength() {
362
+ std::vector<float> tr;
363
+ tr.reserve(remainder.size());
364
+ for (size_t i = 0; i < remainder.size(); i++) {
365
+ tr.push_back(trend[i] + remainder[i]);
366
+ }
367
+ return std::max(0.0, 1.0 - var(remainder) / var(tr));
368
+ }
320
369
  };
321
370
 
322
371
  class StlParams {
@@ -337,62 +386,62 @@ public:
337
386
  inline StlParams seasonal_length(size_t ns) {
338
387
  this->ns_ = ns;
339
388
  return *this;
340
- };
389
+ }
341
390
 
342
391
  inline StlParams trend_length(size_t nt) {
343
392
  this->nt_ = nt;
344
393
  return *this;
345
- };
394
+ }
346
395
 
347
396
  inline StlParams low_pass_length(size_t nl) {
348
397
  this->nl_ = nl;
349
398
  return *this;
350
- };
399
+ }
351
400
 
352
401
  inline StlParams seasonal_degree(int isdeg) {
353
402
  this->isdeg_ = isdeg;
354
403
  return *this;
355
- };
404
+ }
356
405
 
357
406
  inline StlParams trend_degree(int itdeg) {
358
407
  this->itdeg_ = itdeg;
359
408
  return *this;
360
- };
409
+ }
361
410
 
362
411
  inline StlParams low_pass_degree(int ildeg) {
363
412
  this->ildeg_ = ildeg;
364
413
  return *this;
365
- };
414
+ }
366
415
 
367
416
  inline StlParams seasonal_jump(size_t nsjump) {
368
417
  this->nsjump_ = nsjump;
369
418
  return *this;
370
- };
419
+ }
371
420
 
372
421
  inline StlParams trend_jump(size_t ntjump) {
373
422
  this->ntjump_ = ntjump;
374
423
  return *this;
375
- };
424
+ }
376
425
 
377
426
  inline StlParams low_pass_jump(size_t nljump) {
378
427
  this->nljump_ = nljump;
379
428
  return *this;
380
- };
429
+ }
381
430
 
382
431
  inline StlParams inner_loops(bool ni) {
383
432
  this->ni_ = ni;
384
433
  return *this;
385
- };
434
+ }
386
435
 
387
436
  inline StlParams outer_loops(bool no) {
388
437
  this->no_ = no;
389
438
  return *this;
390
- };
439
+ }
391
440
 
392
441
  inline StlParams robust(bool robust) {
393
442
  this->robust_ = robust;
394
443
  return *this;
395
- };
444
+ }
396
445
 
397
446
  StlResult fit(const float* y, size_t n, size_t np);
398
447
  StlResult fit(const std::vector<float>& y, size_t np);
@@ -403,6 +452,10 @@ StlParams params() {
403
452
  }
404
453
 
405
454
  StlResult StlParams::fit(const float* y, size_t n, size_t np) {
455
+ if (n < 2 * np) {
456
+ throw std::invalid_argument("series has less than two periods");
457
+ }
458
+
406
459
  auto ns = this->ns_.value_or(np);
407
460
 
408
461
  auto isdeg = this->isdeg_;
@@ -444,7 +497,7 @@ StlResult StlParams::fit(const float* y, size_t n, size_t np) {
444
497
  stl(y, n, newnp, newns, nt, nl, isdeg, itdeg, ildeg, nsjump, ntjump, nljump, ni, no, res.weights.data(), res.seasonal.data(), res.trend.data());
445
498
 
446
499
  res.remainder.reserve(n);
447
- for (auto i = 0; i < n; i++) {
500
+ for (size_t i = 0; i < n; i++) {
448
501
  res.remainder.push_back(y[i] - res.seasonal[i] - res.trend[i]);
449
502
  }
450
503
 
@@ -1,3 +1,3 @@
1
1
  module AnomalyDetection
2
- VERSION = "0.1.4"
2
+ VERSION = "0.2.0"
3
3
  end
@@ -1,12 +1,19 @@
1
1
  # extensions
2
- require "anomaly_detection/ext"
2
+ require_relative "anomaly_detection/ext"
3
3
 
4
4
  # modules
5
- require "anomaly_detection/version"
5
+ require_relative "anomaly_detection/version"
6
6
 
7
7
  module AnomalyDetection
8
8
  class << self
9
9
  def detect(series, period:, max_anoms: 0.1, alpha: 0.05, direction: "both", plot: false, verbose: false)
10
+ if period == :auto
11
+ period = determine_period(series)
12
+ puts "Set period to #{period}" if verbose
13
+ elsif period.nil?
14
+ period = 1
15
+ end
16
+
10
17
  raise ArgumentError, "series must contain at least 2 periods" if series.size < period * 2
11
18
 
12
19
  if series.is_a?(Hash)
@@ -16,6 +23,9 @@ module AnomalyDetection
16
23
  x = series
17
24
  end
18
25
 
26
+ # flush Ruby output since std::endl flushes C++ output
27
+ $stdout.flush if verbose
28
+
19
29
  res = _detect(x, period, max_anoms, alpha, direction, verbose)
20
30
  res.map! { |i| sorted[i][0] } if series.is_a?(Hash)
21
31
  res
@@ -63,6 +73,51 @@ module AnomalyDetection
63
73
  .config(axis: {title: nil, labelFontSize: 12})
64
74
  end
65
75
 
76
+ # determine period based on time keys (experimental)
77
+ # in future, could use an approach that looks at values
78
+ # like https://stats.stackexchange.com/a/1214
79
+ def determine_period(series)
80
+ unless series.is_a?(Hash)
81
+ raise ArgumentError, "series must be a hash for :auto period"
82
+ end
83
+
84
+ times = series.keys.map(&:to_time)
85
+
86
+ second = times.all? { |t| t.nsec == 0 }
87
+ minute = second && times.all? { |t| t.sec == 0 }
88
+ hour = minute && times.all? { |t| t.min == 0 }
89
+ day = hour && times.all? { |t| t.hour == 0 }
90
+ week = day && times.map { |k| k.wday }.uniq.size == 1
91
+ month = day && times.all? { |k| k.day == 1 }
92
+ quarter = month && times.all? { |k| k.month % 3 == 1 }
93
+ year = quarter && times.all? { |k| k.month == 1 }
94
+
95
+ period =
96
+ if year
97
+ 1
98
+ elsif quarter
99
+ 4
100
+ elsif month
101
+ 12
102
+ elsif week
103
+ 52
104
+ elsif day
105
+ 7
106
+ elsif hour
107
+ 24 # or 24 * 7
108
+ elsif minute
109
+ 60 # or 60 * 24
110
+ elsif second
111
+ 60 # or 60 * 60
112
+ end
113
+
114
+ if series.size < period * 2
115
+ 1
116
+ else
117
+ period
118
+ end
119
+ end
120
+
66
121
  private
67
122
 
68
123
  def iso8601(v)
@@ -0,0 +1,15 @@
1
+ Copyright (C) 2015 Twitter, Inc and other contributors
2
+ Copyright (C) 2022 Andrew Kane
3
+
4
+ This program is free software: you can redistribute it and/or modify
5
+ it under the terms of the GNU General Public License as published by
6
+ the Free Software Foundation, either version 3 of the License, or
7
+ (at your option) any later version.
8
+
9
+ This program is distributed in the hope that it will be useful,
10
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ GNU General Public License for more details.
13
+
14
+ You should have received a copy of the GNU General Public License
15
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: anomaly_detection
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-03-19 00:00:00.000000000 Z
11
+ date: 2023-02-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rice
@@ -45,6 +45,7 @@ files:
45
45
  - licenses/LICENSE-AnomalyDetection-cpp.txt
46
46
  - licenses/LICENSE-MIT-dist-h.txt
47
47
  - licenses/LICENSE-MIT-stl-cpp.txt
48
+ - licenses/NOTICE-AnomalyDetection-cpp.txt
48
49
  - licenses/UNLICENSE-dist-h.txt
49
50
  - licenses/UNLICENSE-stl-cpp.txt
50
51
  homepage: https://github.com/ankane/AnomalyDetection.rb
@@ -59,14 +60,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
59
60
  requirements:
60
61
  - - ">="
61
62
  - !ruby/object:Gem::Version
62
- version: '2.6'
63
+ version: '2.7'
63
64
  required_rubygems_version: !ruby/object:Gem::Requirement
64
65
  requirements:
65
66
  - - ">="
66
67
  - !ruby/object:Gem::Version
67
68
  version: '0'
68
69
  requirements: []
69
- rubygems_version: 3.3.7
70
+ rubygems_version: 3.4.1
70
71
  signing_key:
71
72
  specification_version: 4
72
73
  summary: Time series anomaly detection for Ruby