anomaly_detection 0.1.3 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,5 +1,5 @@
1
1
  /*!
2
- * STL C++ v0.1.0
2
+ * STL C++ v0.1.2
3
3
  * https://github.com/ankane/stl-cpp
4
4
  * Unlicense OR MIT License
5
5
  *
@@ -13,9 +13,10 @@
13
13
  #pragma once
14
14
 
15
15
  #include <algorithm>
16
- #include <cassert>
17
16
  #include <cmath>
17
+ #include <numeric>
18
18
  #include <optional>
19
+ #include <stdexcept>
19
20
  #include <vector>
20
21
 
21
22
  namespace stl {
@@ -91,14 +92,14 @@ void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool user
91
92
  return;
92
93
  }
93
94
 
94
- auto nleft = 0;
95
- auto nright = 0;
95
+ size_t nleft = 0;
96
+ size_t nright = 0;
96
97
 
97
98
  auto newnj = std::min(njump, n - 1);
98
99
  if (len >= n) {
99
100
  nleft = 1;
100
101
  nright = n;
101
- for (auto i = 1; i <= n; i += newnj) {
102
+ for (size_t i = 1; i <= n; i += newnj) {
102
103
  auto ok = est(y, n, len, ideg, (float) i, &ys[i - 1], nleft, nright, res, userw, rw);
103
104
  if (!ok) {
104
105
  ys[i - 1] = y[i - 1];
@@ -108,7 +109,7 @@ void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool user
108
109
  auto nsh = (len + 1) / 2;
109
110
  nleft = 1;
110
111
  nright = len;
111
- for (auto i = 1; i <= n; i++) { // fitted value at i
112
+ for (size_t i = 1; i <= n; i++) { // fitted value at i
112
113
  if (i > nsh && nright != n) {
113
114
  nleft += 1;
114
115
  nright += 1;
@@ -120,7 +121,7 @@ void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool user
120
121
  }
121
122
  } else { // newnj greater than one, len less than n
122
123
  auto nsh = (len + 1) / 2;
123
- for (auto i = 1; i <= n; i += newnj) { // fitted value at i
124
+ for (size_t i = 1; i <= n; i += newnj) { // fitted value at i
124
125
  if (i < nsh) {
125
126
  nleft = 1;
126
127
  nright = len;
@@ -139,7 +140,7 @@ void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool user
139
140
  }
140
141
 
141
142
  if (newnj != 1) {
142
- for (auto i = 1; i <= n - newnj; i += newnj) {
143
+ for (size_t i = 1; i <= n - newnj; i += newnj) {
143
144
  auto delta = (ys[i + newnj - 1] - ys[i - 1]) / ((float) newnj);
144
145
  for (auto j = i + 1; j <= i + newnj - 1; j++) {
145
146
  ys[j - 1] = ys[i - 1] + delta * ((float) (j - i));
@@ -167,7 +168,7 @@ void ma(const float* x, size_t n, size_t len, float* ave) {
167
168
  auto v = 0.0;
168
169
 
169
170
  // get the first average
170
- for (auto i = 0; i < len; i++) {
171
+ for (size_t i = 0; i < len; i++) {
171
172
  v += x[i];
172
173
  }
173
174
 
@@ -175,7 +176,7 @@ void ma(const float* x, size_t n, size_t len, float* ave) {
175
176
  if (newn > 1) {
176
177
  auto k = len;
177
178
  auto m = 0;
178
- for (auto j = 1; j < newn; j++) {
179
+ for (size_t j = 1; j < newn; j++) {
179
180
  // window down the array
180
181
  v = v - x[m] + x[k];
181
182
  ave[j] = v / flen;
@@ -192,7 +193,7 @@ void fts(const float* x, size_t n, size_t np, float* trend, float* work) {
192
193
  }
193
194
 
194
195
  void rwts(const float* y, size_t n, const float* fit, float* rw) {
195
- for (auto i = 0; i < n; i++) {
196
+ for (size_t i = 0; i < n; i++) {
196
197
  rw[i] = fabs(y[i] - fit[i]);
197
198
  }
198
199
 
@@ -206,7 +207,7 @@ void rwts(const float* y, size_t n, const float* fit, float* rw) {
206
207
  auto c9 = 0.999 * cmad;
207
208
  auto c1 = 0.001 * cmad;
208
209
 
209
- for (auto i = 0; i < n; i++) {
210
+ for (size_t i = 0; i < n; i++) {
210
211
  auto r = fabs(y[i] - fit[i]);
211
212
  if (r <= c1) {
212
213
  rw[i] = 1.0;
@@ -219,14 +220,14 @@ void rwts(const float* y, size_t n, const float* fit, float* rw) {
219
220
  }
220
221
 
221
222
  void ss(const float* y, size_t n, size_t np, size_t ns, int isdeg, size_t nsjump, bool userw, float* rw, float* season, float* work1, float* work2, float* work3, float* work4) {
222
- for (auto j = 1; j <= np; j++) {
223
- auto k = (n - j) / np + 1;
223
+ for (size_t j = 1; j <= np; j++) {
224
+ size_t k = (n - j) / np + 1;
224
225
 
225
- for (auto i = 1; i <= k; i++) {
226
+ for (size_t i = 1; i <= k; i++) {
226
227
  work1[i - 1] = y[(i - 1) * np + j - 1];
227
228
  }
228
229
  if (userw) {
229
- for (auto i = 1; i <= k; i++) {
230
+ for (size_t i = 1; i <= k; i++) {
230
231
  work3[i - 1] = rw[(i - 1) * np + j - 1];
231
232
  }
232
233
  }
@@ -243,25 +244,25 @@ void ss(const float* y, size_t n, size_t np, size_t ns, int isdeg, size_t nsjump
243
244
  if (!ok) {
244
245
  work2[k + 1] = work2[k];
245
246
  }
246
- for (auto m = 1; m <= k + 2; m++) {
247
+ for (size_t m = 1; m <= k + 2; m++) {
247
248
  season[(m - 1) * np + j - 1] = work2[m - 1];
248
249
  }
249
250
  }
250
251
  }
251
252
 
252
253
  void onestp(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, int isdeg, int itdeg, int ildeg, size_t nsjump, size_t ntjump, size_t nljump, size_t ni, bool userw, float* rw, float* season, float* trend, float* work1, float* work2, float* work3, float* work4, float* work5) {
253
- for (auto j = 0; j < ni; j++) {
254
- for (auto i = 0; i < n; i++) {
254
+ for (size_t j = 0; j < ni; j++) {
255
+ for (size_t i = 0; i < n; i++) {
255
256
  work1[i] = y[i] - trend[i];
256
257
  }
257
258
 
258
259
  ss(work1, n, np, ns, isdeg, nsjump, userw, rw, work2, work3, work4, work5, season);
259
260
  fts(work2, n + 2 * np, np, work3, work1);
260
261
  ess(work3, n, nl, ildeg, nljump, false, work4, work1, work5);
261
- for (auto i = 0; i < n; i++) {
262
+ for (size_t i = 0; i < n; i++) {
262
263
  season[i] = work2[np + i] - work1[i];
263
264
  }
264
- for (auto i = 0; i < n; i++) {
265
+ for (size_t i = 0; i < n; i++) {
265
266
  work1[i] = y[i] - season[i];
266
267
  }
267
268
  ess(work1, n, nt, itdeg, ntjump, userw, rw, trend, work3);
@@ -269,6 +270,39 @@ void onestp(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl
269
270
  }
270
271
 
271
272
  void stl(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, int isdeg, int itdeg, int ildeg, size_t nsjump, size_t ntjump, size_t nljump, size_t ni, size_t no, float* rw, float* season, float* trend) {
273
+ if (ns < 3) {
274
+ throw std::invalid_argument("seasonal_length must be at least 3");
275
+ }
276
+ if (nt < 3) {
277
+ throw std::invalid_argument("trend_length must be at least 3");
278
+ }
279
+ if (nl < 3) {
280
+ throw std::invalid_argument("low_pass_length must be at least 3");
281
+ }
282
+ if (np < 2) {
283
+ throw std::invalid_argument("period must be at least 2");
284
+ }
285
+
286
+ if (isdeg != 0 && isdeg != 1) {
287
+ throw std::invalid_argument("seasonal_degree must be 0 or 1");
288
+ }
289
+ if (itdeg != 0 && itdeg != 1) {
290
+ throw std::invalid_argument("trend_degree must be 0 or 1");
291
+ }
292
+ if (ildeg != 0 && ildeg != 1) {
293
+ throw std::invalid_argument("low_pass_degree must be 0 or 1");
294
+ }
295
+
296
+ if (ns % 2 != 1) {
297
+ throw std::invalid_argument("seasonal_length must be odd");
298
+ }
299
+ if (nt % 2 != 1) {
300
+ throw std::invalid_argument("trend_length must be odd");
301
+ }
302
+ if (nl % 2 != 1) {
303
+ throw std::invalid_argument("low_pass_length must be odd");
304
+ }
305
+
272
306
  auto work1 = std::vector<float>(n + 2 * np);
273
307
  auto work2 = std::vector<float>(n + 2 * np);
274
308
  auto work3 = std::vector<float>(n + 2 * np);
@@ -276,20 +310,7 @@ void stl(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, i
276
310
  auto work5 = std::vector<float>(n + 2 * np);
277
311
 
278
312
  auto userw = false;
279
- auto k = 0;
280
-
281
- assert(ns >= 3);
282
- assert(nt >= 3);
283
- assert(nl >= 3);
284
- assert(np >= 2);
285
-
286
- assert(isdeg == 0 || isdeg == 1);
287
- assert(itdeg == 0 || itdeg == 1);
288
- assert(ildeg == 0 || ildeg == 1);
289
-
290
- assert(ns % 2 == 1);
291
- assert(nt % 2 == 1);
292
- assert(nl % 2 == 1);
313
+ size_t k = 0;
293
314
 
294
315
  while (true) {
295
316
  onestp(y, n, np, ns, nt, nl, isdeg, itdeg, ildeg, nsjump, ntjump, nljump, ni, userw, rw, season, trend, work1.data(), work2.data(), work3.data(), work4.data(), work5.data());
@@ -297,7 +318,7 @@ void stl(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, i
297
318
  if (k > no) {
298
319
  break;
299
320
  }
300
- for (auto i = 0; i < n; i++) {
321
+ for (size_t i = 0; i < n; i++) {
301
322
  work1[i] = trend[i] + season[i];
302
323
  }
303
324
  rwts(y, n, work1.data(), rw);
@@ -305,18 +326,46 @@ void stl(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, i
305
326
  }
306
327
 
307
328
  if (no <= 0) {
308
- for (auto i = 0; i < n; i++) {
329
+ for (size_t i = 0; i < n; i++) {
309
330
  rw[i] = 1.0;
310
331
  }
311
332
  }
312
333
  }
313
334
 
335
+ float var(const std::vector<float>& series) {
336
+ auto mean = std::accumulate(series.begin(), series.end(), 0.0) / series.size();
337
+ std::vector<float> tmp;
338
+ tmp.reserve(series.size());
339
+ for (auto v : series) {
340
+ tmp.push_back(pow(v - mean, 2));
341
+ }
342
+ return std::accumulate(tmp.begin(), tmp.end(), 0.0) / (series.size() - 1);
343
+ }
344
+
314
345
  class StlResult {
315
346
  public:
316
347
  std::vector<float> seasonal;
317
348
  std::vector<float> trend;
318
349
  std::vector<float> remainder;
319
350
  std::vector<float> weights;
351
+
352
+ inline float seasonal_strength() {
353
+ std::vector<float> sr;
354
+ sr.reserve(remainder.size());
355
+ for (size_t i = 0; i < remainder.size(); i++) {
356
+ sr.push_back(seasonal[i] + remainder[i]);
357
+ }
358
+ return std::max(0.0, 1.0 - var(remainder) / var(sr));
359
+ }
360
+
361
+ inline float trend_strength() {
362
+ std::vector<float> tr;
363
+ tr.reserve(remainder.size());
364
+ for (size_t i = 0; i < remainder.size(); i++) {
365
+ tr.push_back(trend[i] + remainder[i]);
366
+ }
367
+ return std::max(0.0, 1.0 - var(remainder) / var(tr));
368
+ }
320
369
  };
321
370
 
322
371
  class StlParams {
@@ -337,62 +386,62 @@ public:
337
386
  inline StlParams seasonal_length(size_t ns) {
338
387
  this->ns_ = ns;
339
388
  return *this;
340
- };
389
+ }
341
390
 
342
391
  inline StlParams trend_length(size_t nt) {
343
392
  this->nt_ = nt;
344
393
  return *this;
345
- };
394
+ }
346
395
 
347
396
  inline StlParams low_pass_length(size_t nl) {
348
397
  this->nl_ = nl;
349
398
  return *this;
350
- };
399
+ }
351
400
 
352
401
  inline StlParams seasonal_degree(int isdeg) {
353
402
  this->isdeg_ = isdeg;
354
403
  return *this;
355
- };
404
+ }
356
405
 
357
406
  inline StlParams trend_degree(int itdeg) {
358
407
  this->itdeg_ = itdeg;
359
408
  return *this;
360
- };
409
+ }
361
410
 
362
411
  inline StlParams low_pass_degree(int ildeg) {
363
412
  this->ildeg_ = ildeg;
364
413
  return *this;
365
- };
414
+ }
366
415
 
367
416
  inline StlParams seasonal_jump(size_t nsjump) {
368
417
  this->nsjump_ = nsjump;
369
418
  return *this;
370
- };
419
+ }
371
420
 
372
421
  inline StlParams trend_jump(size_t ntjump) {
373
422
  this->ntjump_ = ntjump;
374
423
  return *this;
375
- };
424
+ }
376
425
 
377
426
  inline StlParams low_pass_jump(size_t nljump) {
378
427
  this->nljump_ = nljump;
379
428
  return *this;
380
- };
429
+ }
381
430
 
382
431
  inline StlParams inner_loops(bool ni) {
383
432
  this->ni_ = ni;
384
433
  return *this;
385
- };
434
+ }
386
435
 
387
436
  inline StlParams outer_loops(bool no) {
388
437
  this->no_ = no;
389
438
  return *this;
390
- };
439
+ }
391
440
 
392
441
  inline StlParams robust(bool robust) {
393
442
  this->robust_ = robust;
394
443
  return *this;
395
- };
444
+ }
396
445
 
397
446
  StlResult fit(const float* y, size_t n, size_t np);
398
447
  StlResult fit(const std::vector<float>& y, size_t np);
@@ -403,6 +452,10 @@ StlParams params() {
403
452
  }
404
453
 
405
454
  StlResult StlParams::fit(const float* y, size_t n, size_t np) {
455
+ if (n < 2 * np) {
456
+ throw std::invalid_argument("series has less than two periods");
457
+ }
458
+
406
459
  auto ns = this->ns_.value_or(np);
407
460
 
408
461
  auto isdeg = this->isdeg_;
@@ -444,7 +497,7 @@ StlResult StlParams::fit(const float* y, size_t n, size_t np) {
444
497
  stl(y, n, newnp, newns, nt, nl, isdeg, itdeg, ildeg, nsjump, ntjump, nljump, ni, no, res.weights.data(), res.seasonal.data(), res.trend.data());
445
498
 
446
499
  res.remainder.reserve(n);
447
- for (auto i = 0; i < n; i++) {
500
+ for (size_t i = 0; i < n; i++) {
448
501
  res.remainder.push_back(y[i] - res.seasonal[i] - res.trend[i]);
449
502
  }
450
503
 
@@ -1,3 +1,3 @@
1
1
  module AnomalyDetection
2
- VERSION = "0.1.3"
2
+ VERSION = "0.2.0"
3
3
  end
@@ -1,12 +1,19 @@
1
1
  # extensions
2
- require "anomaly_detection/ext"
2
+ require_relative "anomaly_detection/ext"
3
3
 
4
4
  # modules
5
- require "anomaly_detection/version"
5
+ require_relative "anomaly_detection/version"
6
6
 
7
7
  module AnomalyDetection
8
8
  class << self
9
9
  def detect(series, period:, max_anoms: 0.1, alpha: 0.05, direction: "both", plot: false, verbose: false)
10
+ if period == :auto
11
+ period = determine_period(series)
12
+ puts "Set period to #{period}" if verbose
13
+ elsif period.nil?
14
+ period = 1
15
+ end
16
+
10
17
  raise ArgumentError, "series must contain at least 2 periods" if series.size < period * 2
11
18
 
12
19
  if series.is_a?(Hash)
@@ -16,6 +23,9 @@ module AnomalyDetection
16
23
  x = series
17
24
  end
18
25
 
26
+ # flush Ruby output since std::endl flushes C++ output
27
+ $stdout.flush if verbose
28
+
19
29
  res = _detect(x, period, max_anoms, alpha, direction, verbose)
20
30
  res.map! { |i| sorted[i][0] } if series.is_a?(Hash)
21
31
  res
@@ -63,6 +73,51 @@ module AnomalyDetection
63
73
  .config(axis: {title: nil, labelFontSize: 12})
64
74
  end
65
75
 
76
+ # determine period based on time keys (experimental)
77
+ # in future, could use an approach that looks at values
78
+ # like https://stats.stackexchange.com/a/1214
79
+ def determine_period(series)
80
+ unless series.is_a?(Hash)
81
+ raise ArgumentError, "series must be a hash for :auto period"
82
+ end
83
+
84
+ times = series.keys.map(&:to_time)
85
+
86
+ second = times.all? { |t| t.nsec == 0 }
87
+ minute = second && times.all? { |t| t.sec == 0 }
88
+ hour = minute && times.all? { |t| t.min == 0 }
89
+ day = hour && times.all? { |t| t.hour == 0 }
90
+ week = day && times.map { |k| k.wday }.uniq.size == 1
91
+ month = day && times.all? { |k| k.day == 1 }
92
+ quarter = month && times.all? { |k| k.month % 3 == 1 }
93
+ year = quarter && times.all? { |k| k.month == 1 }
94
+
95
+ period =
96
+ if year
97
+ 1
98
+ elsif quarter
99
+ 4
100
+ elsif month
101
+ 12
102
+ elsif week
103
+ 52
104
+ elsif day
105
+ 7
106
+ elsif hour
107
+ 24 # or 24 * 7
108
+ elsif minute
109
+ 60 # or 60 * 24
110
+ elsif second
111
+ 60 # or 60 * 60
112
+ end
113
+
114
+ if series.size < period * 2
115
+ 1
116
+ else
117
+ period
118
+ end
119
+ end
120
+
66
121
  private
67
122
 
68
123
  def iso8601(v)