anomaly_detection 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/NOTICE.txt +1 -1
- data/README.md +2 -2
- data/ext/anomaly_detection/anomaly_detection.hpp +200 -2
- data/ext/anomaly_detection/dist.h +105 -49
- data/ext/anomaly_detection/ext.cpp +9 -3
- data/ext/anomaly_detection/stl.hpp +103 -50
- data/lib/anomaly_detection/version.rb +1 -1
- data/lib/anomaly_detection.rb +57 -2
- data/licenses/LICENSE-AnomalyDetection-cpp.txt +675 -0
- data/licenses/NOTICE-AnomalyDetection-cpp.txt +15 -0
- metadata +6 -5
- data/ext/anomaly_detection/anomaly_detection.cpp +0 -139
@@ -1,5 +1,5 @@
|
|
1
1
|
/*!
|
2
|
-
* STL C++ v0.1.
|
2
|
+
* STL C++ v0.1.2
|
3
3
|
* https://github.com/ankane/stl-cpp
|
4
4
|
* Unlicense OR MIT License
|
5
5
|
*
|
@@ -13,9 +13,10 @@
|
|
13
13
|
#pragma once
|
14
14
|
|
15
15
|
#include <algorithm>
|
16
|
-
#include <cassert>
|
17
16
|
#include <cmath>
|
17
|
+
#include <numeric>
|
18
18
|
#include <optional>
|
19
|
+
#include <stdexcept>
|
19
20
|
#include <vector>
|
20
21
|
|
21
22
|
namespace stl {
|
@@ -91,14 +92,14 @@ void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool user
|
|
91
92
|
return;
|
92
93
|
}
|
93
94
|
|
94
|
-
|
95
|
-
|
95
|
+
size_t nleft = 0;
|
96
|
+
size_t nright = 0;
|
96
97
|
|
97
98
|
auto newnj = std::min(njump, n - 1);
|
98
99
|
if (len >= n) {
|
99
100
|
nleft = 1;
|
100
101
|
nright = n;
|
101
|
-
for (
|
102
|
+
for (size_t i = 1; i <= n; i += newnj) {
|
102
103
|
auto ok = est(y, n, len, ideg, (float) i, &ys[i - 1], nleft, nright, res, userw, rw);
|
103
104
|
if (!ok) {
|
104
105
|
ys[i - 1] = y[i - 1];
|
@@ -108,7 +109,7 @@ void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool user
|
|
108
109
|
auto nsh = (len + 1) / 2;
|
109
110
|
nleft = 1;
|
110
111
|
nright = len;
|
111
|
-
for (
|
112
|
+
for (size_t i = 1; i <= n; i++) { // fitted value at i
|
112
113
|
if (i > nsh && nright != n) {
|
113
114
|
nleft += 1;
|
114
115
|
nright += 1;
|
@@ -120,7 +121,7 @@ void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool user
|
|
120
121
|
}
|
121
122
|
} else { // newnj greater than one, len less than n
|
122
123
|
auto nsh = (len + 1) / 2;
|
123
|
-
for (
|
124
|
+
for (size_t i = 1; i <= n; i += newnj) { // fitted value at i
|
124
125
|
if (i < nsh) {
|
125
126
|
nleft = 1;
|
126
127
|
nright = len;
|
@@ -139,7 +140,7 @@ void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool user
|
|
139
140
|
}
|
140
141
|
|
141
142
|
if (newnj != 1) {
|
142
|
-
for (
|
143
|
+
for (size_t i = 1; i <= n - newnj; i += newnj) {
|
143
144
|
auto delta = (ys[i + newnj - 1] - ys[i - 1]) / ((float) newnj);
|
144
145
|
for (auto j = i + 1; j <= i + newnj - 1; j++) {
|
145
146
|
ys[j - 1] = ys[i - 1] + delta * ((float) (j - i));
|
@@ -167,7 +168,7 @@ void ma(const float* x, size_t n, size_t len, float* ave) {
|
|
167
168
|
auto v = 0.0;
|
168
169
|
|
169
170
|
// get the first average
|
170
|
-
for (
|
171
|
+
for (size_t i = 0; i < len; i++) {
|
171
172
|
v += x[i];
|
172
173
|
}
|
173
174
|
|
@@ -175,7 +176,7 @@ void ma(const float* x, size_t n, size_t len, float* ave) {
|
|
175
176
|
if (newn > 1) {
|
176
177
|
auto k = len;
|
177
178
|
auto m = 0;
|
178
|
-
for (
|
179
|
+
for (size_t j = 1; j < newn; j++) {
|
179
180
|
// window down the array
|
180
181
|
v = v - x[m] + x[k];
|
181
182
|
ave[j] = v / flen;
|
@@ -192,7 +193,7 @@ void fts(const float* x, size_t n, size_t np, float* trend, float* work) {
|
|
192
193
|
}
|
193
194
|
|
194
195
|
void rwts(const float* y, size_t n, const float* fit, float* rw) {
|
195
|
-
for (
|
196
|
+
for (size_t i = 0; i < n; i++) {
|
196
197
|
rw[i] = fabs(y[i] - fit[i]);
|
197
198
|
}
|
198
199
|
|
@@ -206,7 +207,7 @@ void rwts(const float* y, size_t n, const float* fit, float* rw) {
|
|
206
207
|
auto c9 = 0.999 * cmad;
|
207
208
|
auto c1 = 0.001 * cmad;
|
208
209
|
|
209
|
-
for (
|
210
|
+
for (size_t i = 0; i < n; i++) {
|
210
211
|
auto r = fabs(y[i] - fit[i]);
|
211
212
|
if (r <= c1) {
|
212
213
|
rw[i] = 1.0;
|
@@ -219,14 +220,14 @@ void rwts(const float* y, size_t n, const float* fit, float* rw) {
|
|
219
220
|
}
|
220
221
|
|
221
222
|
void ss(const float* y, size_t n, size_t np, size_t ns, int isdeg, size_t nsjump, bool userw, float* rw, float* season, float* work1, float* work2, float* work3, float* work4) {
|
222
|
-
for (
|
223
|
-
|
223
|
+
for (size_t j = 1; j <= np; j++) {
|
224
|
+
size_t k = (n - j) / np + 1;
|
224
225
|
|
225
|
-
for (
|
226
|
+
for (size_t i = 1; i <= k; i++) {
|
226
227
|
work1[i - 1] = y[(i - 1) * np + j - 1];
|
227
228
|
}
|
228
229
|
if (userw) {
|
229
|
-
for (
|
230
|
+
for (size_t i = 1; i <= k; i++) {
|
230
231
|
work3[i - 1] = rw[(i - 1) * np + j - 1];
|
231
232
|
}
|
232
233
|
}
|
@@ -243,25 +244,25 @@ void ss(const float* y, size_t n, size_t np, size_t ns, int isdeg, size_t nsjump
|
|
243
244
|
if (!ok) {
|
244
245
|
work2[k + 1] = work2[k];
|
245
246
|
}
|
246
|
-
for (
|
247
|
+
for (size_t m = 1; m <= k + 2; m++) {
|
247
248
|
season[(m - 1) * np + j - 1] = work2[m - 1];
|
248
249
|
}
|
249
250
|
}
|
250
251
|
}
|
251
252
|
|
252
253
|
void onestp(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, int isdeg, int itdeg, int ildeg, size_t nsjump, size_t ntjump, size_t nljump, size_t ni, bool userw, float* rw, float* season, float* trend, float* work1, float* work2, float* work3, float* work4, float* work5) {
|
253
|
-
for (
|
254
|
-
for (
|
254
|
+
for (size_t j = 0; j < ni; j++) {
|
255
|
+
for (size_t i = 0; i < n; i++) {
|
255
256
|
work1[i] = y[i] - trend[i];
|
256
257
|
}
|
257
258
|
|
258
259
|
ss(work1, n, np, ns, isdeg, nsjump, userw, rw, work2, work3, work4, work5, season);
|
259
260
|
fts(work2, n + 2 * np, np, work3, work1);
|
260
261
|
ess(work3, n, nl, ildeg, nljump, false, work4, work1, work5);
|
261
|
-
for (
|
262
|
+
for (size_t i = 0; i < n; i++) {
|
262
263
|
season[i] = work2[np + i] - work1[i];
|
263
264
|
}
|
264
|
-
for (
|
265
|
+
for (size_t i = 0; i < n; i++) {
|
265
266
|
work1[i] = y[i] - season[i];
|
266
267
|
}
|
267
268
|
ess(work1, n, nt, itdeg, ntjump, userw, rw, trend, work3);
|
@@ -269,6 +270,39 @@ void onestp(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl
|
|
269
270
|
}
|
270
271
|
|
271
272
|
void stl(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, int isdeg, int itdeg, int ildeg, size_t nsjump, size_t ntjump, size_t nljump, size_t ni, size_t no, float* rw, float* season, float* trend) {
|
273
|
+
if (ns < 3) {
|
274
|
+
throw std::invalid_argument("seasonal_length must be at least 3");
|
275
|
+
}
|
276
|
+
if (nt < 3) {
|
277
|
+
throw std::invalid_argument("trend_length must be at least 3");
|
278
|
+
}
|
279
|
+
if (nl < 3) {
|
280
|
+
throw std::invalid_argument("low_pass_length must be at least 3");
|
281
|
+
}
|
282
|
+
if (np < 2) {
|
283
|
+
throw std::invalid_argument("period must be at least 2");
|
284
|
+
}
|
285
|
+
|
286
|
+
if (isdeg != 0 && isdeg != 1) {
|
287
|
+
throw std::invalid_argument("seasonal_degree must be 0 or 1");
|
288
|
+
}
|
289
|
+
if (itdeg != 0 && itdeg != 1) {
|
290
|
+
throw std::invalid_argument("trend_degree must be 0 or 1");
|
291
|
+
}
|
292
|
+
if (ildeg != 0 && ildeg != 1) {
|
293
|
+
throw std::invalid_argument("low_pass_degree must be 0 or 1");
|
294
|
+
}
|
295
|
+
|
296
|
+
if (ns % 2 != 1) {
|
297
|
+
throw std::invalid_argument("seasonal_length must be odd");
|
298
|
+
}
|
299
|
+
if (nt % 2 != 1) {
|
300
|
+
throw std::invalid_argument("trend_length must be odd");
|
301
|
+
}
|
302
|
+
if (nl % 2 != 1) {
|
303
|
+
throw std::invalid_argument("low_pass_length must be odd");
|
304
|
+
}
|
305
|
+
|
272
306
|
auto work1 = std::vector<float>(n + 2 * np);
|
273
307
|
auto work2 = std::vector<float>(n + 2 * np);
|
274
308
|
auto work3 = std::vector<float>(n + 2 * np);
|
@@ -276,20 +310,7 @@ void stl(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, i
|
|
276
310
|
auto work5 = std::vector<float>(n + 2 * np);
|
277
311
|
|
278
312
|
auto userw = false;
|
279
|
-
|
280
|
-
|
281
|
-
assert(ns >= 3);
|
282
|
-
assert(nt >= 3);
|
283
|
-
assert(nl >= 3);
|
284
|
-
assert(np >= 2);
|
285
|
-
|
286
|
-
assert(isdeg == 0 || isdeg == 1);
|
287
|
-
assert(itdeg == 0 || itdeg == 1);
|
288
|
-
assert(ildeg == 0 || ildeg == 1);
|
289
|
-
|
290
|
-
assert(ns % 2 == 1);
|
291
|
-
assert(nt % 2 == 1);
|
292
|
-
assert(nl % 2 == 1);
|
313
|
+
size_t k = 0;
|
293
314
|
|
294
315
|
while (true) {
|
295
316
|
onestp(y, n, np, ns, nt, nl, isdeg, itdeg, ildeg, nsjump, ntjump, nljump, ni, userw, rw, season, trend, work1.data(), work2.data(), work3.data(), work4.data(), work5.data());
|
@@ -297,7 +318,7 @@ void stl(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, i
|
|
297
318
|
if (k > no) {
|
298
319
|
break;
|
299
320
|
}
|
300
|
-
for (
|
321
|
+
for (size_t i = 0; i < n; i++) {
|
301
322
|
work1[i] = trend[i] + season[i];
|
302
323
|
}
|
303
324
|
rwts(y, n, work1.data(), rw);
|
@@ -305,18 +326,46 @@ void stl(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, i
|
|
305
326
|
}
|
306
327
|
|
307
328
|
if (no <= 0) {
|
308
|
-
for (
|
329
|
+
for (size_t i = 0; i < n; i++) {
|
309
330
|
rw[i] = 1.0;
|
310
331
|
}
|
311
332
|
}
|
312
333
|
}
|
313
334
|
|
335
|
+
float var(const std::vector<float>& series) {
|
336
|
+
auto mean = std::accumulate(series.begin(), series.end(), 0.0) / series.size();
|
337
|
+
std::vector<float> tmp;
|
338
|
+
tmp.reserve(series.size());
|
339
|
+
for (auto v : series) {
|
340
|
+
tmp.push_back(pow(v - mean, 2));
|
341
|
+
}
|
342
|
+
return std::accumulate(tmp.begin(), tmp.end(), 0.0) / (series.size() - 1);
|
343
|
+
}
|
344
|
+
|
314
345
|
class StlResult {
|
315
346
|
public:
|
316
347
|
std::vector<float> seasonal;
|
317
348
|
std::vector<float> trend;
|
318
349
|
std::vector<float> remainder;
|
319
350
|
std::vector<float> weights;
|
351
|
+
|
352
|
+
inline float seasonal_strength() {
|
353
|
+
std::vector<float> sr;
|
354
|
+
sr.reserve(remainder.size());
|
355
|
+
for (size_t i = 0; i < remainder.size(); i++) {
|
356
|
+
sr.push_back(seasonal[i] + remainder[i]);
|
357
|
+
}
|
358
|
+
return std::max(0.0, 1.0 - var(remainder) / var(sr));
|
359
|
+
}
|
360
|
+
|
361
|
+
inline float trend_strength() {
|
362
|
+
std::vector<float> tr;
|
363
|
+
tr.reserve(remainder.size());
|
364
|
+
for (size_t i = 0; i < remainder.size(); i++) {
|
365
|
+
tr.push_back(trend[i] + remainder[i]);
|
366
|
+
}
|
367
|
+
return std::max(0.0, 1.0 - var(remainder) / var(tr));
|
368
|
+
}
|
320
369
|
};
|
321
370
|
|
322
371
|
class StlParams {
|
@@ -337,62 +386,62 @@ public:
|
|
337
386
|
inline StlParams seasonal_length(size_t ns) {
|
338
387
|
this->ns_ = ns;
|
339
388
|
return *this;
|
340
|
-
}
|
389
|
+
}
|
341
390
|
|
342
391
|
inline StlParams trend_length(size_t nt) {
|
343
392
|
this->nt_ = nt;
|
344
393
|
return *this;
|
345
|
-
}
|
394
|
+
}
|
346
395
|
|
347
396
|
inline StlParams low_pass_length(size_t nl) {
|
348
397
|
this->nl_ = nl;
|
349
398
|
return *this;
|
350
|
-
}
|
399
|
+
}
|
351
400
|
|
352
401
|
inline StlParams seasonal_degree(int isdeg) {
|
353
402
|
this->isdeg_ = isdeg;
|
354
403
|
return *this;
|
355
|
-
}
|
404
|
+
}
|
356
405
|
|
357
406
|
inline StlParams trend_degree(int itdeg) {
|
358
407
|
this->itdeg_ = itdeg;
|
359
408
|
return *this;
|
360
|
-
}
|
409
|
+
}
|
361
410
|
|
362
411
|
inline StlParams low_pass_degree(int ildeg) {
|
363
412
|
this->ildeg_ = ildeg;
|
364
413
|
return *this;
|
365
|
-
}
|
414
|
+
}
|
366
415
|
|
367
416
|
inline StlParams seasonal_jump(size_t nsjump) {
|
368
417
|
this->nsjump_ = nsjump;
|
369
418
|
return *this;
|
370
|
-
}
|
419
|
+
}
|
371
420
|
|
372
421
|
inline StlParams trend_jump(size_t ntjump) {
|
373
422
|
this->ntjump_ = ntjump;
|
374
423
|
return *this;
|
375
|
-
}
|
424
|
+
}
|
376
425
|
|
377
426
|
inline StlParams low_pass_jump(size_t nljump) {
|
378
427
|
this->nljump_ = nljump;
|
379
428
|
return *this;
|
380
|
-
}
|
429
|
+
}
|
381
430
|
|
382
431
|
inline StlParams inner_loops(bool ni) {
|
383
432
|
this->ni_ = ni;
|
384
433
|
return *this;
|
385
|
-
}
|
434
|
+
}
|
386
435
|
|
387
436
|
inline StlParams outer_loops(bool no) {
|
388
437
|
this->no_ = no;
|
389
438
|
return *this;
|
390
|
-
}
|
439
|
+
}
|
391
440
|
|
392
441
|
inline StlParams robust(bool robust) {
|
393
442
|
this->robust_ = robust;
|
394
443
|
return *this;
|
395
|
-
}
|
444
|
+
}
|
396
445
|
|
397
446
|
StlResult fit(const float* y, size_t n, size_t np);
|
398
447
|
StlResult fit(const std::vector<float>& y, size_t np);
|
@@ -403,6 +452,10 @@ StlParams params() {
|
|
403
452
|
}
|
404
453
|
|
405
454
|
StlResult StlParams::fit(const float* y, size_t n, size_t np) {
|
455
|
+
if (n < 2 * np) {
|
456
|
+
throw std::invalid_argument("series has less than two periods");
|
457
|
+
}
|
458
|
+
|
406
459
|
auto ns = this->ns_.value_or(np);
|
407
460
|
|
408
461
|
auto isdeg = this->isdeg_;
|
@@ -444,7 +497,7 @@ StlResult StlParams::fit(const float* y, size_t n, size_t np) {
|
|
444
497
|
stl(y, n, newnp, newns, nt, nl, isdeg, itdeg, ildeg, nsjump, ntjump, nljump, ni, no, res.weights.data(), res.seasonal.data(), res.trend.data());
|
445
498
|
|
446
499
|
res.remainder.reserve(n);
|
447
|
-
for (
|
500
|
+
for (size_t i = 0; i < n; i++) {
|
448
501
|
res.remainder.push_back(y[i] - res.seasonal[i] - res.trend[i]);
|
449
502
|
}
|
450
503
|
|
data/lib/anomaly_detection.rb
CHANGED
@@ -1,12 +1,19 @@
|
|
1
1
|
# extensions
|
2
|
-
|
2
|
+
require_relative "anomaly_detection/ext"
|
3
3
|
|
4
4
|
# modules
|
5
|
-
|
5
|
+
require_relative "anomaly_detection/version"
|
6
6
|
|
7
7
|
module AnomalyDetection
|
8
8
|
class << self
|
9
9
|
def detect(series, period:, max_anoms: 0.1, alpha: 0.05, direction: "both", plot: false, verbose: false)
|
10
|
+
if period == :auto
|
11
|
+
period = determine_period(series)
|
12
|
+
puts "Set period to #{period}" if verbose
|
13
|
+
elsif period.nil?
|
14
|
+
period = 1
|
15
|
+
end
|
16
|
+
|
10
17
|
raise ArgumentError, "series must contain at least 2 periods" if series.size < period * 2
|
11
18
|
|
12
19
|
if series.is_a?(Hash)
|
@@ -16,6 +23,9 @@ module AnomalyDetection
|
|
16
23
|
x = series
|
17
24
|
end
|
18
25
|
|
26
|
+
# flush Ruby output since std::endl flushes C++ output
|
27
|
+
$stdout.flush if verbose
|
28
|
+
|
19
29
|
res = _detect(x, period, max_anoms, alpha, direction, verbose)
|
20
30
|
res.map! { |i| sorted[i][0] } if series.is_a?(Hash)
|
21
31
|
res
|
@@ -63,6 +73,51 @@ module AnomalyDetection
|
|
63
73
|
.config(axis: {title: nil, labelFontSize: 12})
|
64
74
|
end
|
65
75
|
|
76
|
+
# determine period based on time keys (experimental)
|
77
|
+
# in future, could use an approach that looks at values
|
78
|
+
# like https://stats.stackexchange.com/a/1214
|
79
|
+
def determine_period(series)
|
80
|
+
unless series.is_a?(Hash)
|
81
|
+
raise ArgumentError, "series must be a hash for :auto period"
|
82
|
+
end
|
83
|
+
|
84
|
+
times = series.keys.map(&:to_time)
|
85
|
+
|
86
|
+
second = times.all? { |t| t.nsec == 0 }
|
87
|
+
minute = second && times.all? { |t| t.sec == 0 }
|
88
|
+
hour = minute && times.all? { |t| t.min == 0 }
|
89
|
+
day = hour && times.all? { |t| t.hour == 0 }
|
90
|
+
week = day && times.map { |k| k.wday }.uniq.size == 1
|
91
|
+
month = day && times.all? { |k| k.day == 1 }
|
92
|
+
quarter = month && times.all? { |k| k.month % 3 == 1 }
|
93
|
+
year = quarter && times.all? { |k| k.month == 1 }
|
94
|
+
|
95
|
+
period =
|
96
|
+
if year
|
97
|
+
1
|
98
|
+
elsif quarter
|
99
|
+
4
|
100
|
+
elsif month
|
101
|
+
12
|
102
|
+
elsif week
|
103
|
+
52
|
104
|
+
elsif day
|
105
|
+
7
|
106
|
+
elsif hour
|
107
|
+
24 # or 24 * 7
|
108
|
+
elsif minute
|
109
|
+
60 # or 60 * 24
|
110
|
+
elsif second
|
111
|
+
60 # or 60 * 60
|
112
|
+
end
|
113
|
+
|
114
|
+
if series.size < period * 2
|
115
|
+
1
|
116
|
+
else
|
117
|
+
period
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
66
121
|
private
|
67
122
|
|
68
123
|
def iso8601(v)
|