anomaly_detection 0.1.3 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/NOTICE.txt +1 -1
- data/README.md +2 -2
- data/ext/anomaly_detection/anomaly_detection.hpp +200 -2
- data/ext/anomaly_detection/dist.h +105 -49
- data/ext/anomaly_detection/ext.cpp +9 -3
- data/ext/anomaly_detection/stl.hpp +103 -50
- data/lib/anomaly_detection/version.rb +1 -1
- data/lib/anomaly_detection.rb +57 -2
- data/licenses/LICENSE-AnomalyDetection-cpp.txt +675 -0
- data/licenses/NOTICE-AnomalyDetection-cpp.txt +15 -0
- metadata +6 -5
- data/ext/anomaly_detection/anomaly_detection.cpp +0 -139
@@ -0,0 +1,15 @@
|
|
1
|
+
Copyright (C) 2015 Twitter, Inc and other contributors
|
2
|
+
Copyright (C) 2022 Andrew Kane
|
3
|
+
|
4
|
+
This program is free software: you can redistribute it and/or modify
|
5
|
+
it under the terms of the GNU General Public License as published by
|
6
|
+
the Free Software Foundation, either version 3 of the License, or
|
7
|
+
(at your option) any later version.
|
8
|
+
|
9
|
+
This program is distributed in the hope that it will be useful,
|
10
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
11
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
12
|
+
GNU General Public License for more details.
|
13
|
+
|
14
|
+
You should have received a copy of the GNU General Public License
|
15
|
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: anomaly_detection
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-02-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rice
|
@@ -35,7 +35,6 @@ files:
|
|
35
35
|
- LICENSE.txt
|
36
36
|
- NOTICE.txt
|
37
37
|
- README.md
|
38
|
-
- ext/anomaly_detection/anomaly_detection.cpp
|
39
38
|
- ext/anomaly_detection/anomaly_detection.hpp
|
40
39
|
- ext/anomaly_detection/dist.h
|
41
40
|
- ext/anomaly_detection/ext.cpp
|
@@ -43,8 +42,10 @@ files:
|
|
43
42
|
- ext/anomaly_detection/stl.hpp
|
44
43
|
- lib/anomaly_detection.rb
|
45
44
|
- lib/anomaly_detection/version.rb
|
45
|
+
- licenses/LICENSE-AnomalyDetection-cpp.txt
|
46
46
|
- licenses/LICENSE-MIT-dist-h.txt
|
47
47
|
- licenses/LICENSE-MIT-stl-cpp.txt
|
48
|
+
- licenses/NOTICE-AnomalyDetection-cpp.txt
|
48
49
|
- licenses/UNLICENSE-dist-h.txt
|
49
50
|
- licenses/UNLICENSE-stl-cpp.txt
|
50
51
|
homepage: https://github.com/ankane/AnomalyDetection.rb
|
@@ -59,14 +60,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
59
60
|
requirements:
|
60
61
|
- - ">="
|
61
62
|
- !ruby/object:Gem::Version
|
62
|
-
version: '2.
|
63
|
+
version: '2.7'
|
63
64
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
64
65
|
requirements:
|
65
66
|
- - ">="
|
66
67
|
- !ruby/object:Gem::Version
|
67
68
|
version: '0'
|
68
69
|
requirements: []
|
69
|
-
rubygems_version: 3.
|
70
|
+
rubygems_version: 3.4.1
|
70
71
|
signing_key:
|
71
72
|
specification_version: 4
|
72
73
|
summary: Time series anomaly detection for Ruby
|
@@ -1,139 +0,0 @@
|
|
1
|
-
#include <functional>
|
2
|
-
#include <iostream>
|
3
|
-
#include <iterator>
|
4
|
-
#include <numeric>
|
5
|
-
#include <string>
|
6
|
-
#include <vector>
|
7
|
-
|
8
|
-
#include "anomaly_detection.hpp"
|
9
|
-
#include "dist.h"
|
10
|
-
#include "stl.hpp"
|
11
|
-
|
12
|
-
namespace anomaly_detection {
|
13
|
-
|
14
|
-
float median(const std::vector<float>& sorted) {
|
15
|
-
return (sorted[(sorted.size() - 1) / 2] + sorted[sorted.size() / 2]) / 2.0;
|
16
|
-
}
|
17
|
-
|
18
|
-
float mad(const std::vector<float>& data, float med) {
|
19
|
-
std::vector<float> res;
|
20
|
-
res.reserve(data.size());
|
21
|
-
for (auto v : data) {
|
22
|
-
res.push_back(fabs(v - med));
|
23
|
-
}
|
24
|
-
std::sort(res.begin(), res.end());
|
25
|
-
return 1.4826 * median(res);
|
26
|
-
}
|
27
|
-
|
28
|
-
std::vector<size_t> detect_anoms(const std::vector<float>& data, int num_obs_per_period, float k, float alpha, bool one_tail, bool upper_tail, bool verbose, std::function<void()> check_for_interrupts) {
|
29
|
-
auto n = data.size();
|
30
|
-
|
31
|
-
// Check to make sure we have at least two periods worth of data for anomaly context
|
32
|
-
if (n < num_obs_per_period * 2) {
|
33
|
-
throw std::invalid_argument("series must contain at least 2 periods");
|
34
|
-
}
|
35
|
-
|
36
|
-
// Handle NANs
|
37
|
-
auto nan = std::count_if(data.begin(), data.end(), [](const auto& value) { return std::isnan(value); });
|
38
|
-
if (nan > 0) {
|
39
|
-
throw std::invalid_argument("series contains NANs");
|
40
|
-
}
|
41
|
-
|
42
|
-
// Decompose data. This returns a univarite remainder which will be used for anomaly detection. Optionally, we might NOT decompose.
|
43
|
-
auto seasonal_length = n * 10 + 1;
|
44
|
-
auto data_decomp = stl::params().robust(true).seasonal_length(seasonal_length).fit(data, num_obs_per_period);
|
45
|
-
|
46
|
-
auto seasonal = data_decomp.seasonal;
|
47
|
-
auto med = median(data);
|
48
|
-
std::vector<float> data2;
|
49
|
-
data2.reserve(n);
|
50
|
-
for (auto i = 0; i < n; i++) {
|
51
|
-
data2.push_back(data[i] - seasonal[i] - med);
|
52
|
-
}
|
53
|
-
|
54
|
-
std::vector<size_t> r_idx;
|
55
|
-
auto num_anoms = 0;
|
56
|
-
auto max_outliers = (size_t) n * k;
|
57
|
-
|
58
|
-
// Sort data for fast median
|
59
|
-
// Use stable sort for indexes for deterministic results
|
60
|
-
std::vector<size_t> indexes(n);
|
61
|
-
std::iota(indexes.begin(), indexes.end(), 0);
|
62
|
-
std::stable_sort(indexes.begin(), indexes.end(), [&data2](size_t a, size_t b) { return data2[a] < data2[b]; });
|
63
|
-
std::sort(data2.begin(), data2.end());
|
64
|
-
|
65
|
-
// Compute test statistic until r=max_outliers values have been removed from the sample
|
66
|
-
for (auto i = 1; i <= max_outliers; i++) {
|
67
|
-
check_for_interrupts();
|
68
|
-
|
69
|
-
if (verbose) {
|
70
|
-
std::cout << i << " / " << max_outliers << " completed" << std::endl;
|
71
|
-
}
|
72
|
-
|
73
|
-
// TODO Improve performance between loop iterations
|
74
|
-
auto ma = median(data2);
|
75
|
-
std::vector<float> ares;
|
76
|
-
ares.reserve(data2.size());
|
77
|
-
if (one_tail) {
|
78
|
-
if (upper_tail) {
|
79
|
-
for (auto v : data2) {
|
80
|
-
ares.push_back(v - ma);
|
81
|
-
}
|
82
|
-
} else {
|
83
|
-
for (auto v : data2) {
|
84
|
-
ares.push_back(ma - v);
|
85
|
-
}
|
86
|
-
}
|
87
|
-
} else {
|
88
|
-
for (auto v : data2) {
|
89
|
-
ares.push_back(fabs(v - ma));
|
90
|
-
}
|
91
|
-
}
|
92
|
-
|
93
|
-
// Protect against constant time series
|
94
|
-
auto data_sigma = mad(data2, ma);
|
95
|
-
if (data_sigma == 0.0) {
|
96
|
-
break;
|
97
|
-
}
|
98
|
-
|
99
|
-
auto iter = std::max_element(ares.begin(), ares.end());
|
100
|
-
auto r_idx_i = std::distance(ares.begin(), iter);
|
101
|
-
|
102
|
-
// Only need to take sigma of r for performance
|
103
|
-
auto r = ares[r_idx_i] / data_sigma;
|
104
|
-
|
105
|
-
r_idx.push_back(indexes[r_idx_i]);
|
106
|
-
data2.erase(data2.begin() + r_idx_i);
|
107
|
-
indexes.erase(indexes.begin() + r_idx_i);
|
108
|
-
|
109
|
-
// Compute critical value
|
110
|
-
float p;
|
111
|
-
if (one_tail) {
|
112
|
-
p = 1.0 - alpha / (n - i + 1);
|
113
|
-
} else {
|
114
|
-
p = 1.0 - alpha / (2.0 * (n - i + 1));
|
115
|
-
}
|
116
|
-
|
117
|
-
auto t = students_t_ppf(p, n - i - 1);
|
118
|
-
auto lam = t * (n - i) / sqrt(((n - i - 1) + powf(t, 2.0)) * (n - i + 1));
|
119
|
-
|
120
|
-
if (r > lam) {
|
121
|
-
num_anoms = i;
|
122
|
-
}
|
123
|
-
}
|
124
|
-
|
125
|
-
std::vector<size_t> anomalies(r_idx.begin(), r_idx.begin() + num_anoms);
|
126
|
-
|
127
|
-
// Sort like R version
|
128
|
-
std::sort(anomalies.begin(), anomalies.end());
|
129
|
-
|
130
|
-
return anomalies;
|
131
|
-
}
|
132
|
-
|
133
|
-
std::vector<size_t> anomalies(const std::vector<float>& x, int period, float k, float alpha, Direction direction, bool verbose, std::function<void()> check_for_interrupts) {
|
134
|
-
bool one_tail = direction != Direction::Both;
|
135
|
-
bool upper_tail = direction == Direction::Positive;
|
136
|
-
return detect_anoms(x, period, k, alpha, one_tail, upper_tail, verbose, check_for_interrupts);
|
137
|
-
}
|
138
|
-
|
139
|
-
}
|