anomaly_detection 0.1.3 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,15 @@
1
+ Copyright (C) 2015 Twitter, Inc and other contributors
2
+ Copyright (C) 2022 Andrew Kane
3
+
4
+ This program is free software: you can redistribute it and/or modify
5
+ it under the terms of the GNU General Public License as published by
6
+ the Free Software Foundation, either version 3 of the License, or
7
+ (at your option) any later version.
8
+
9
+ This program is distributed in the hope that it will be useful,
10
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ GNU General Public License for more details.
13
+
14
+ You should have received a copy of the GNU General Public License
15
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: anomaly_detection
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-01-04 00:00:00.000000000 Z
11
+ date: 2023-02-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rice
@@ -35,7 +35,6 @@ files:
35
35
  - LICENSE.txt
36
36
  - NOTICE.txt
37
37
  - README.md
38
- - ext/anomaly_detection/anomaly_detection.cpp
39
38
  - ext/anomaly_detection/anomaly_detection.hpp
40
39
  - ext/anomaly_detection/dist.h
41
40
  - ext/anomaly_detection/ext.cpp
@@ -43,8 +42,10 @@ files:
43
42
  - ext/anomaly_detection/stl.hpp
44
43
  - lib/anomaly_detection.rb
45
44
  - lib/anomaly_detection/version.rb
45
+ - licenses/LICENSE-AnomalyDetection-cpp.txt
46
46
  - licenses/LICENSE-MIT-dist-h.txt
47
47
  - licenses/LICENSE-MIT-stl-cpp.txt
48
+ - licenses/NOTICE-AnomalyDetection-cpp.txt
48
49
  - licenses/UNLICENSE-dist-h.txt
49
50
  - licenses/UNLICENSE-stl-cpp.txt
50
51
  homepage: https://github.com/ankane/AnomalyDetection.rb
@@ -59,14 +60,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
59
60
  requirements:
60
61
  - - ">="
61
62
  - !ruby/object:Gem::Version
62
- version: '2.6'
63
+ version: '2.7'
63
64
  required_rubygems_version: !ruby/object:Gem::Requirement
64
65
  requirements:
65
66
  - - ">="
66
67
  - !ruby/object:Gem::Version
67
68
  version: '0'
68
69
  requirements: []
69
- rubygems_version: 3.2.32
70
+ rubygems_version: 3.4.1
70
71
  signing_key:
71
72
  specification_version: 4
72
73
  summary: Time series anomaly detection for Ruby
@@ -1,139 +0,0 @@
1
- #include <functional>
2
- #include <iostream>
3
- #include <iterator>
4
- #include <numeric>
5
- #include <string>
6
- #include <vector>
7
-
8
- #include "anomaly_detection.hpp"
9
- #include "dist.h"
10
- #include "stl.hpp"
11
-
12
- namespace anomaly_detection {
13
-
14
- float median(const std::vector<float>& sorted) {
15
- return (sorted[(sorted.size() - 1) / 2] + sorted[sorted.size() / 2]) / 2.0;
16
- }
17
-
18
- float mad(const std::vector<float>& data, float med) {
19
- std::vector<float> res;
20
- res.reserve(data.size());
21
- for (auto v : data) {
22
- res.push_back(fabs(v - med));
23
- }
24
- std::sort(res.begin(), res.end());
25
- return 1.4826 * median(res);
26
- }
27
-
28
- std::vector<size_t> detect_anoms(const std::vector<float>& data, int num_obs_per_period, float k, float alpha, bool one_tail, bool upper_tail, bool verbose, std::function<void()> check_for_interrupts) {
29
- auto n = data.size();
30
-
31
- // Check to make sure we have at least two periods worth of data for anomaly context
32
- if (n < num_obs_per_period * 2) {
33
- throw std::invalid_argument("series must contain at least 2 periods");
34
- }
35
-
36
- // Handle NANs
37
- auto nan = std::count_if(data.begin(), data.end(), [](const auto& value) { return std::isnan(value); });
38
- if (nan > 0) {
39
- throw std::invalid_argument("series contains NANs");
40
- }
41
-
42
- // Decompose data. This returns a univarite remainder which will be used for anomaly detection. Optionally, we might NOT decompose.
43
- auto seasonal_length = n * 10 + 1;
44
- auto data_decomp = stl::params().robust(true).seasonal_length(seasonal_length).fit(data, num_obs_per_period);
45
-
46
- auto seasonal = data_decomp.seasonal;
47
- auto med = median(data);
48
- std::vector<float> data2;
49
- data2.reserve(n);
50
- for (auto i = 0; i < n; i++) {
51
- data2.push_back(data[i] - seasonal[i] - med);
52
- }
53
-
54
- std::vector<size_t> r_idx;
55
- auto num_anoms = 0;
56
- auto max_outliers = (size_t) n * k;
57
-
58
- // Sort data for fast median
59
- // Use stable sort for indexes for deterministic results
60
- std::vector<size_t> indexes(n);
61
- std::iota(indexes.begin(), indexes.end(), 0);
62
- std::stable_sort(indexes.begin(), indexes.end(), [&data2](size_t a, size_t b) { return data2[a] < data2[b]; });
63
- std::sort(data2.begin(), data2.end());
64
-
65
- // Compute test statistic until r=max_outliers values have been removed from the sample
66
- for (auto i = 1; i <= max_outliers; i++) {
67
- check_for_interrupts();
68
-
69
- if (verbose) {
70
- std::cout << i << " / " << max_outliers << " completed" << std::endl;
71
- }
72
-
73
- // TODO Improve performance between loop iterations
74
- auto ma = median(data2);
75
- std::vector<float> ares;
76
- ares.reserve(data2.size());
77
- if (one_tail) {
78
- if (upper_tail) {
79
- for (auto v : data2) {
80
- ares.push_back(v - ma);
81
- }
82
- } else {
83
- for (auto v : data2) {
84
- ares.push_back(ma - v);
85
- }
86
- }
87
- } else {
88
- for (auto v : data2) {
89
- ares.push_back(fabs(v - ma));
90
- }
91
- }
92
-
93
- // Protect against constant time series
94
- auto data_sigma = mad(data2, ma);
95
- if (data_sigma == 0.0) {
96
- break;
97
- }
98
-
99
- auto iter = std::max_element(ares.begin(), ares.end());
100
- auto r_idx_i = std::distance(ares.begin(), iter);
101
-
102
- // Only need to take sigma of r for performance
103
- auto r = ares[r_idx_i] / data_sigma;
104
-
105
- r_idx.push_back(indexes[r_idx_i]);
106
- data2.erase(data2.begin() + r_idx_i);
107
- indexes.erase(indexes.begin() + r_idx_i);
108
-
109
- // Compute critical value
110
- float p;
111
- if (one_tail) {
112
- p = 1.0 - alpha / (n - i + 1);
113
- } else {
114
- p = 1.0 - alpha / (2.0 * (n - i + 1));
115
- }
116
-
117
- auto t = students_t_ppf(p, n - i - 1);
118
- auto lam = t * (n - i) / sqrt(((n - i - 1) + powf(t, 2.0)) * (n - i + 1));
119
-
120
- if (r > lam) {
121
- num_anoms = i;
122
- }
123
- }
124
-
125
- std::vector<size_t> anomalies(r_idx.begin(), r_idx.begin() + num_anoms);
126
-
127
- // Sort like R version
128
- std::sort(anomalies.begin(), anomalies.end());
129
-
130
- return anomalies;
131
- }
132
-
133
- std::vector<size_t> anomalies(const std::vector<float>& x, int period, float k, float alpha, Direction direction, bool verbose, std::function<void()> check_for_interrupts) {
134
- bool one_tail = direction != Direction::Both;
135
- bool upper_tail = direction == Direction::Positive;
136
- return detect_anoms(x, period, k, alpha, one_tail, upper_tail, verbose, check_for_interrupts);
137
- }
138
-
139
- }