anomaly_detection 0.1.3 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/NOTICE.txt +1 -1
- data/README.md +2 -2
- data/ext/anomaly_detection/anomaly_detection.hpp +200 -2
- data/ext/anomaly_detection/dist.h +105 -49
- data/ext/anomaly_detection/ext.cpp +9 -3
- data/ext/anomaly_detection/stl.hpp +103 -50
- data/lib/anomaly_detection/version.rb +1 -1
- data/lib/anomaly_detection.rb +57 -2
- data/licenses/LICENSE-AnomalyDetection-cpp.txt +675 -0
- data/licenses/NOTICE-AnomalyDetection-cpp.txt +15 -0
- metadata +6 -5
- data/ext/anomaly_detection/anomaly_detection.cpp +0 -139
@@ -1,5 +1,5 @@
|
|
1
1
|
/*!
|
2
|
-
* STL C++ v0.1.
|
2
|
+
* STL C++ v0.1.2
|
3
3
|
* https://github.com/ankane/stl-cpp
|
4
4
|
* Unlicense OR MIT License
|
5
5
|
*
|
@@ -13,9 +13,10 @@
|
|
13
13
|
#pragma once
|
14
14
|
|
15
15
|
#include <algorithm>
|
16
|
-
#include <cassert>
|
17
16
|
#include <cmath>
|
17
|
+
#include <numeric>
|
18
18
|
#include <optional>
|
19
|
+
#include <stdexcept>
|
19
20
|
#include <vector>
|
20
21
|
|
21
22
|
namespace stl {
|
@@ -91,14 +92,14 @@ void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool user
|
|
91
92
|
return;
|
92
93
|
}
|
93
94
|
|
94
|
-
|
95
|
-
|
95
|
+
size_t nleft = 0;
|
96
|
+
size_t nright = 0;
|
96
97
|
|
97
98
|
auto newnj = std::min(njump, n - 1);
|
98
99
|
if (len >= n) {
|
99
100
|
nleft = 1;
|
100
101
|
nright = n;
|
101
|
-
for (
|
102
|
+
for (size_t i = 1; i <= n; i += newnj) {
|
102
103
|
auto ok = est(y, n, len, ideg, (float) i, &ys[i - 1], nleft, nright, res, userw, rw);
|
103
104
|
if (!ok) {
|
104
105
|
ys[i - 1] = y[i - 1];
|
@@ -108,7 +109,7 @@ void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool user
|
|
108
109
|
auto nsh = (len + 1) / 2;
|
109
110
|
nleft = 1;
|
110
111
|
nright = len;
|
111
|
-
for (
|
112
|
+
for (size_t i = 1; i <= n; i++) { // fitted value at i
|
112
113
|
if (i > nsh && nright != n) {
|
113
114
|
nleft += 1;
|
114
115
|
nright += 1;
|
@@ -120,7 +121,7 @@ void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool user
|
|
120
121
|
}
|
121
122
|
} else { // newnj greater than one, len less than n
|
122
123
|
auto nsh = (len + 1) / 2;
|
123
|
-
for (
|
124
|
+
for (size_t i = 1; i <= n; i += newnj) { // fitted value at i
|
124
125
|
if (i < nsh) {
|
125
126
|
nleft = 1;
|
126
127
|
nright = len;
|
@@ -139,7 +140,7 @@ void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool user
|
|
139
140
|
}
|
140
141
|
|
141
142
|
if (newnj != 1) {
|
142
|
-
for (
|
143
|
+
for (size_t i = 1; i <= n - newnj; i += newnj) {
|
143
144
|
auto delta = (ys[i + newnj - 1] - ys[i - 1]) / ((float) newnj);
|
144
145
|
for (auto j = i + 1; j <= i + newnj - 1; j++) {
|
145
146
|
ys[j - 1] = ys[i - 1] + delta * ((float) (j - i));
|
@@ -167,7 +168,7 @@ void ma(const float* x, size_t n, size_t len, float* ave) {
|
|
167
168
|
auto v = 0.0;
|
168
169
|
|
169
170
|
// get the first average
|
170
|
-
for (
|
171
|
+
for (size_t i = 0; i < len; i++) {
|
171
172
|
v += x[i];
|
172
173
|
}
|
173
174
|
|
@@ -175,7 +176,7 @@ void ma(const float* x, size_t n, size_t len, float* ave) {
|
|
175
176
|
if (newn > 1) {
|
176
177
|
auto k = len;
|
177
178
|
auto m = 0;
|
178
|
-
for (
|
179
|
+
for (size_t j = 1; j < newn; j++) {
|
179
180
|
// window down the array
|
180
181
|
v = v - x[m] + x[k];
|
181
182
|
ave[j] = v / flen;
|
@@ -192,7 +193,7 @@ void fts(const float* x, size_t n, size_t np, float* trend, float* work) {
|
|
192
193
|
}
|
193
194
|
|
194
195
|
void rwts(const float* y, size_t n, const float* fit, float* rw) {
|
195
|
-
for (
|
196
|
+
for (size_t i = 0; i < n; i++) {
|
196
197
|
rw[i] = fabs(y[i] - fit[i]);
|
197
198
|
}
|
198
199
|
|
@@ -206,7 +207,7 @@ void rwts(const float* y, size_t n, const float* fit, float* rw) {
|
|
206
207
|
auto c9 = 0.999 * cmad;
|
207
208
|
auto c1 = 0.001 * cmad;
|
208
209
|
|
209
|
-
for (
|
210
|
+
for (size_t i = 0; i < n; i++) {
|
210
211
|
auto r = fabs(y[i] - fit[i]);
|
211
212
|
if (r <= c1) {
|
212
213
|
rw[i] = 1.0;
|
@@ -219,14 +220,14 @@ void rwts(const float* y, size_t n, const float* fit, float* rw) {
|
|
219
220
|
}
|
220
221
|
|
221
222
|
void ss(const float* y, size_t n, size_t np, size_t ns, int isdeg, size_t nsjump, bool userw, float* rw, float* season, float* work1, float* work2, float* work3, float* work4) {
|
222
|
-
for (
|
223
|
-
|
223
|
+
for (size_t j = 1; j <= np; j++) {
|
224
|
+
size_t k = (n - j) / np + 1;
|
224
225
|
|
225
|
-
for (
|
226
|
+
for (size_t i = 1; i <= k; i++) {
|
226
227
|
work1[i - 1] = y[(i - 1) * np + j - 1];
|
227
228
|
}
|
228
229
|
if (userw) {
|
229
|
-
for (
|
230
|
+
for (size_t i = 1; i <= k; i++) {
|
230
231
|
work3[i - 1] = rw[(i - 1) * np + j - 1];
|
231
232
|
}
|
232
233
|
}
|
@@ -243,25 +244,25 @@ void ss(const float* y, size_t n, size_t np, size_t ns, int isdeg, size_t nsjump
|
|
243
244
|
if (!ok) {
|
244
245
|
work2[k + 1] = work2[k];
|
245
246
|
}
|
246
|
-
for (
|
247
|
+
for (size_t m = 1; m <= k + 2; m++) {
|
247
248
|
season[(m - 1) * np + j - 1] = work2[m - 1];
|
248
249
|
}
|
249
250
|
}
|
250
251
|
}
|
251
252
|
|
252
253
|
void onestp(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, int isdeg, int itdeg, int ildeg, size_t nsjump, size_t ntjump, size_t nljump, size_t ni, bool userw, float* rw, float* season, float* trend, float* work1, float* work2, float* work3, float* work4, float* work5) {
|
253
|
-
for (
|
254
|
-
for (
|
254
|
+
for (size_t j = 0; j < ni; j++) {
|
255
|
+
for (size_t i = 0; i < n; i++) {
|
255
256
|
work1[i] = y[i] - trend[i];
|
256
257
|
}
|
257
258
|
|
258
259
|
ss(work1, n, np, ns, isdeg, nsjump, userw, rw, work2, work3, work4, work5, season);
|
259
260
|
fts(work2, n + 2 * np, np, work3, work1);
|
260
261
|
ess(work3, n, nl, ildeg, nljump, false, work4, work1, work5);
|
261
|
-
for (
|
262
|
+
for (size_t i = 0; i < n; i++) {
|
262
263
|
season[i] = work2[np + i] - work1[i];
|
263
264
|
}
|
264
|
-
for (
|
265
|
+
for (size_t i = 0; i < n; i++) {
|
265
266
|
work1[i] = y[i] - season[i];
|
266
267
|
}
|
267
268
|
ess(work1, n, nt, itdeg, ntjump, userw, rw, trend, work3);
|
@@ -269,6 +270,39 @@ void onestp(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl
|
|
269
270
|
}
|
270
271
|
|
271
272
|
void stl(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, int isdeg, int itdeg, int ildeg, size_t nsjump, size_t ntjump, size_t nljump, size_t ni, size_t no, float* rw, float* season, float* trend) {
|
273
|
+
if (ns < 3) {
|
274
|
+
throw std::invalid_argument("seasonal_length must be at least 3");
|
275
|
+
}
|
276
|
+
if (nt < 3) {
|
277
|
+
throw std::invalid_argument("trend_length must be at least 3");
|
278
|
+
}
|
279
|
+
if (nl < 3) {
|
280
|
+
throw std::invalid_argument("low_pass_length must be at least 3");
|
281
|
+
}
|
282
|
+
if (np < 2) {
|
283
|
+
throw std::invalid_argument("period must be at least 2");
|
284
|
+
}
|
285
|
+
|
286
|
+
if (isdeg != 0 && isdeg != 1) {
|
287
|
+
throw std::invalid_argument("seasonal_degree must be 0 or 1");
|
288
|
+
}
|
289
|
+
if (itdeg != 0 && itdeg != 1) {
|
290
|
+
throw std::invalid_argument("trend_degree must be 0 or 1");
|
291
|
+
}
|
292
|
+
if (ildeg != 0 && ildeg != 1) {
|
293
|
+
throw std::invalid_argument("low_pass_degree must be 0 or 1");
|
294
|
+
}
|
295
|
+
|
296
|
+
if (ns % 2 != 1) {
|
297
|
+
throw std::invalid_argument("seasonal_length must be odd");
|
298
|
+
}
|
299
|
+
if (nt % 2 != 1) {
|
300
|
+
throw std::invalid_argument("trend_length must be odd");
|
301
|
+
}
|
302
|
+
if (nl % 2 != 1) {
|
303
|
+
throw std::invalid_argument("low_pass_length must be odd");
|
304
|
+
}
|
305
|
+
|
272
306
|
auto work1 = std::vector<float>(n + 2 * np);
|
273
307
|
auto work2 = std::vector<float>(n + 2 * np);
|
274
308
|
auto work3 = std::vector<float>(n + 2 * np);
|
@@ -276,20 +310,7 @@ void stl(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, i
|
|
276
310
|
auto work5 = std::vector<float>(n + 2 * np);
|
277
311
|
|
278
312
|
auto userw = false;
|
279
|
-
|
280
|
-
|
281
|
-
assert(ns >= 3);
|
282
|
-
assert(nt >= 3);
|
283
|
-
assert(nl >= 3);
|
284
|
-
assert(np >= 2);
|
285
|
-
|
286
|
-
assert(isdeg == 0 || isdeg == 1);
|
287
|
-
assert(itdeg == 0 || itdeg == 1);
|
288
|
-
assert(ildeg == 0 || ildeg == 1);
|
289
|
-
|
290
|
-
assert(ns % 2 == 1);
|
291
|
-
assert(nt % 2 == 1);
|
292
|
-
assert(nl % 2 == 1);
|
313
|
+
size_t k = 0;
|
293
314
|
|
294
315
|
while (true) {
|
295
316
|
onestp(y, n, np, ns, nt, nl, isdeg, itdeg, ildeg, nsjump, ntjump, nljump, ni, userw, rw, season, trend, work1.data(), work2.data(), work3.data(), work4.data(), work5.data());
|
@@ -297,7 +318,7 @@ void stl(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, i
|
|
297
318
|
if (k > no) {
|
298
319
|
break;
|
299
320
|
}
|
300
|
-
for (
|
321
|
+
for (size_t i = 0; i < n; i++) {
|
301
322
|
work1[i] = trend[i] + season[i];
|
302
323
|
}
|
303
324
|
rwts(y, n, work1.data(), rw);
|
@@ -305,18 +326,46 @@ void stl(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, i
|
|
305
326
|
}
|
306
327
|
|
307
328
|
if (no <= 0) {
|
308
|
-
for (
|
329
|
+
for (size_t i = 0; i < n; i++) {
|
309
330
|
rw[i] = 1.0;
|
310
331
|
}
|
311
332
|
}
|
312
333
|
}
|
313
334
|
|
335
|
+
float var(const std::vector<float>& series) {
|
336
|
+
auto mean = std::accumulate(series.begin(), series.end(), 0.0) / series.size();
|
337
|
+
std::vector<float> tmp;
|
338
|
+
tmp.reserve(series.size());
|
339
|
+
for (auto v : series) {
|
340
|
+
tmp.push_back(pow(v - mean, 2));
|
341
|
+
}
|
342
|
+
return std::accumulate(tmp.begin(), tmp.end(), 0.0) / (series.size() - 1);
|
343
|
+
}
|
344
|
+
|
314
345
|
class StlResult {
|
315
346
|
public:
|
316
347
|
std::vector<float> seasonal;
|
317
348
|
std::vector<float> trend;
|
318
349
|
std::vector<float> remainder;
|
319
350
|
std::vector<float> weights;
|
351
|
+
|
352
|
+
inline float seasonal_strength() {
|
353
|
+
std::vector<float> sr;
|
354
|
+
sr.reserve(remainder.size());
|
355
|
+
for (size_t i = 0; i < remainder.size(); i++) {
|
356
|
+
sr.push_back(seasonal[i] + remainder[i]);
|
357
|
+
}
|
358
|
+
return std::max(0.0, 1.0 - var(remainder) / var(sr));
|
359
|
+
}
|
360
|
+
|
361
|
+
inline float trend_strength() {
|
362
|
+
std::vector<float> tr;
|
363
|
+
tr.reserve(remainder.size());
|
364
|
+
for (size_t i = 0; i < remainder.size(); i++) {
|
365
|
+
tr.push_back(trend[i] + remainder[i]);
|
366
|
+
}
|
367
|
+
return std::max(0.0, 1.0 - var(remainder) / var(tr));
|
368
|
+
}
|
320
369
|
};
|
321
370
|
|
322
371
|
class StlParams {
|
@@ -337,62 +386,62 @@ public:
|
|
337
386
|
inline StlParams seasonal_length(size_t ns) {
|
338
387
|
this->ns_ = ns;
|
339
388
|
return *this;
|
340
|
-
}
|
389
|
+
}
|
341
390
|
|
342
391
|
inline StlParams trend_length(size_t nt) {
|
343
392
|
this->nt_ = nt;
|
344
393
|
return *this;
|
345
|
-
}
|
394
|
+
}
|
346
395
|
|
347
396
|
inline StlParams low_pass_length(size_t nl) {
|
348
397
|
this->nl_ = nl;
|
349
398
|
return *this;
|
350
|
-
}
|
399
|
+
}
|
351
400
|
|
352
401
|
inline StlParams seasonal_degree(int isdeg) {
|
353
402
|
this->isdeg_ = isdeg;
|
354
403
|
return *this;
|
355
|
-
}
|
404
|
+
}
|
356
405
|
|
357
406
|
inline StlParams trend_degree(int itdeg) {
|
358
407
|
this->itdeg_ = itdeg;
|
359
408
|
return *this;
|
360
|
-
}
|
409
|
+
}
|
361
410
|
|
362
411
|
inline StlParams low_pass_degree(int ildeg) {
|
363
412
|
this->ildeg_ = ildeg;
|
364
413
|
return *this;
|
365
|
-
}
|
414
|
+
}
|
366
415
|
|
367
416
|
inline StlParams seasonal_jump(size_t nsjump) {
|
368
417
|
this->nsjump_ = nsjump;
|
369
418
|
return *this;
|
370
|
-
}
|
419
|
+
}
|
371
420
|
|
372
421
|
inline StlParams trend_jump(size_t ntjump) {
|
373
422
|
this->ntjump_ = ntjump;
|
374
423
|
return *this;
|
375
|
-
}
|
424
|
+
}
|
376
425
|
|
377
426
|
inline StlParams low_pass_jump(size_t nljump) {
|
378
427
|
this->nljump_ = nljump;
|
379
428
|
return *this;
|
380
|
-
}
|
429
|
+
}
|
381
430
|
|
382
431
|
inline StlParams inner_loops(bool ni) {
|
383
432
|
this->ni_ = ni;
|
384
433
|
return *this;
|
385
|
-
}
|
434
|
+
}
|
386
435
|
|
387
436
|
inline StlParams outer_loops(bool no) {
|
388
437
|
this->no_ = no;
|
389
438
|
return *this;
|
390
|
-
}
|
439
|
+
}
|
391
440
|
|
392
441
|
inline StlParams robust(bool robust) {
|
393
442
|
this->robust_ = robust;
|
394
443
|
return *this;
|
395
|
-
}
|
444
|
+
}
|
396
445
|
|
397
446
|
StlResult fit(const float* y, size_t n, size_t np);
|
398
447
|
StlResult fit(const std::vector<float>& y, size_t np);
|
@@ -403,6 +452,10 @@ StlParams params() {
|
|
403
452
|
}
|
404
453
|
|
405
454
|
StlResult StlParams::fit(const float* y, size_t n, size_t np) {
|
455
|
+
if (n < 2 * np) {
|
456
|
+
throw std::invalid_argument("series has less than two periods");
|
457
|
+
}
|
458
|
+
|
406
459
|
auto ns = this->ns_.value_or(np);
|
407
460
|
|
408
461
|
auto isdeg = this->isdeg_;
|
@@ -444,7 +497,7 @@ StlResult StlParams::fit(const float* y, size_t n, size_t np) {
|
|
444
497
|
stl(y, n, newnp, newns, nt, nl, isdeg, itdeg, ildeg, nsjump, ntjump, nljump, ni, no, res.weights.data(), res.seasonal.data(), res.trend.data());
|
445
498
|
|
446
499
|
res.remainder.reserve(n);
|
447
|
-
for (
|
500
|
+
for (size_t i = 0; i < n; i++) {
|
448
501
|
res.remainder.push_back(y[i] - res.seasonal[i] - res.trend[i]);
|
449
502
|
}
|
450
503
|
|
data/lib/anomaly_detection.rb
CHANGED
@@ -1,12 +1,19 @@
|
|
1
1
|
# extensions
|
2
|
-
|
2
|
+
require_relative "anomaly_detection/ext"
|
3
3
|
|
4
4
|
# modules
|
5
|
-
|
5
|
+
require_relative "anomaly_detection/version"
|
6
6
|
|
7
7
|
module AnomalyDetection
|
8
8
|
class << self
|
9
9
|
def detect(series, period:, max_anoms: 0.1, alpha: 0.05, direction: "both", plot: false, verbose: false)
|
10
|
+
if period == :auto
|
11
|
+
period = determine_period(series)
|
12
|
+
puts "Set period to #{period}" if verbose
|
13
|
+
elsif period.nil?
|
14
|
+
period = 1
|
15
|
+
end
|
16
|
+
|
10
17
|
raise ArgumentError, "series must contain at least 2 periods" if series.size < period * 2
|
11
18
|
|
12
19
|
if series.is_a?(Hash)
|
@@ -16,6 +23,9 @@ module AnomalyDetection
|
|
16
23
|
x = series
|
17
24
|
end
|
18
25
|
|
26
|
+
# flush Ruby output since std::endl flushes C++ output
|
27
|
+
$stdout.flush if verbose
|
28
|
+
|
19
29
|
res = _detect(x, period, max_anoms, alpha, direction, verbose)
|
20
30
|
res.map! { |i| sorted[i][0] } if series.is_a?(Hash)
|
21
31
|
res
|
@@ -63,6 +73,51 @@ module AnomalyDetection
|
|
63
73
|
.config(axis: {title: nil, labelFontSize: 12})
|
64
74
|
end
|
65
75
|
|
76
|
+
# determine period based on time keys (experimental)
|
77
|
+
# in future, could use an approach that looks at values
|
78
|
+
# like https://stats.stackexchange.com/a/1214
|
79
|
+
def determine_period(series)
|
80
|
+
unless series.is_a?(Hash)
|
81
|
+
raise ArgumentError, "series must be a hash for :auto period"
|
82
|
+
end
|
83
|
+
|
84
|
+
times = series.keys.map(&:to_time)
|
85
|
+
|
86
|
+
second = times.all? { |t| t.nsec == 0 }
|
87
|
+
minute = second && times.all? { |t| t.sec == 0 }
|
88
|
+
hour = minute && times.all? { |t| t.min == 0 }
|
89
|
+
day = hour && times.all? { |t| t.hour == 0 }
|
90
|
+
week = day && times.map { |k| k.wday }.uniq.size == 1
|
91
|
+
month = day && times.all? { |k| k.day == 1 }
|
92
|
+
quarter = month && times.all? { |k| k.month % 3 == 1 }
|
93
|
+
year = quarter && times.all? { |k| k.month == 1 }
|
94
|
+
|
95
|
+
period =
|
96
|
+
if year
|
97
|
+
1
|
98
|
+
elsif quarter
|
99
|
+
4
|
100
|
+
elsif month
|
101
|
+
12
|
102
|
+
elsif week
|
103
|
+
52
|
104
|
+
elsif day
|
105
|
+
7
|
106
|
+
elsif hour
|
107
|
+
24 # or 24 * 7
|
108
|
+
elsif minute
|
109
|
+
60 # or 60 * 24
|
110
|
+
elsif second
|
111
|
+
60 # or 60 * 60
|
112
|
+
end
|
113
|
+
|
114
|
+
if series.size < period * 2
|
115
|
+
1
|
116
|
+
else
|
117
|
+
period
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
66
121
|
private
|
67
122
|
|
68
123
|
def iso8601(v)
|