anomaly_detection 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ Copyright (C) 2015 Twitter, Inc and other contributors
2
+ Copyright (C) 2022 Andrew Kane
3
+
4
+ This program is free software: you can redistribute it and/or modify
5
+ it under the terms of the GNU General Public License as published by
6
+ the Free Software Foundation, either version 3 of the License, or
7
+ (at your option) any later version.
8
+
9
+ This program is distributed in the hope that it will be useful,
10
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ GNU General Public License for more details.
13
+
14
+ You should have received a copy of the GNU General Public License
15
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: anomaly_detection
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-01-04 00:00:00.000000000 Z
11
+ date: 2023-02-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rice
@@ -35,7 +35,6 @@ files:
35
35
  - LICENSE.txt
36
36
  - NOTICE.txt
37
37
  - README.md
38
- - ext/anomaly_detection/anomaly_detection.cpp
39
38
  - ext/anomaly_detection/anomaly_detection.hpp
40
39
  - ext/anomaly_detection/dist.h
41
40
  - ext/anomaly_detection/ext.cpp
@@ -43,8 +42,10 @@ files:
43
42
  - ext/anomaly_detection/stl.hpp
44
43
  - lib/anomaly_detection.rb
45
44
  - lib/anomaly_detection/version.rb
45
+ - licenses/LICENSE-AnomalyDetection-cpp.txt
46
46
  - licenses/LICENSE-MIT-dist-h.txt
47
47
  - licenses/LICENSE-MIT-stl-cpp.txt
48
+ - licenses/NOTICE-AnomalyDetection-cpp.txt
48
49
  - licenses/UNLICENSE-dist-h.txt
49
50
  - licenses/UNLICENSE-stl-cpp.txt
50
51
  homepage: https://github.com/ankane/AnomalyDetection.rb
@@ -59,14 +60,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
59
60
  requirements:
60
61
  - - ">="
61
62
  - !ruby/object:Gem::Version
62
- version: '2.6'
63
+ version: '2.7'
63
64
  required_rubygems_version: !ruby/object:Gem::Requirement
64
65
  requirements:
65
66
  - - ">="
66
67
  - !ruby/object:Gem::Version
67
68
  version: '0'
68
69
  requirements: []
69
- rubygems_version: 3.2.32
70
+ rubygems_version: 3.4.1
70
71
  signing_key:
71
72
  specification_version: 4
72
73
  summary: Time series anomaly detection for Ruby
@@ -1,139 +0,0 @@
1
- #include <functional>
2
- #include <iostream>
3
- #include <iterator>
4
- #include <numeric>
5
- #include <string>
6
- #include <vector>
7
-
8
- #include "anomaly_detection.hpp"
9
- #include "dist.h"
10
- #include "stl.hpp"
11
-
12
- namespace anomaly_detection {
13
-
14
- float median(const std::vector<float>& sorted) {
15
- return (sorted[(sorted.size() - 1) / 2] + sorted[sorted.size() / 2]) / 2.0;
16
- }
17
-
18
- float mad(const std::vector<float>& data, float med) {
19
- std::vector<float> res;
20
- res.reserve(data.size());
21
- for (auto v : data) {
22
- res.push_back(fabs(v - med));
23
- }
24
- std::sort(res.begin(), res.end());
25
- return 1.4826 * median(res);
26
- }
27
-
28
- std::vector<size_t> detect_anoms(const std::vector<float>& data, int num_obs_per_period, float k, float alpha, bool one_tail, bool upper_tail, bool verbose, std::function<void()> check_for_interrupts) {
29
- auto n = data.size();
30
-
31
- // Check to make sure we have at least two periods worth of data for anomaly context
32
- if (n < num_obs_per_period * 2) {
33
- throw std::invalid_argument("series must contain at least 2 periods");
34
- }
35
-
36
- // Handle NANs
37
- auto nan = std::count_if(data.begin(), data.end(), [](const auto& value) { return std::isnan(value); });
38
- if (nan > 0) {
39
- throw std::invalid_argument("series contains NANs");
40
- }
41
-
42
- // Decompose data. This returns a univarite remainder which will be used for anomaly detection. Optionally, we might NOT decompose.
43
- auto seasonal_length = n * 10 + 1;
44
- auto data_decomp = stl::params().robust(true).seasonal_length(seasonal_length).fit(data, num_obs_per_period);
45
-
46
- auto seasonal = data_decomp.seasonal;
47
- auto med = median(data);
48
- std::vector<float> data2;
49
- data2.reserve(n);
50
- for (auto i = 0; i < n; i++) {
51
- data2.push_back(data[i] - seasonal[i] - med);
52
- }
53
-
54
- std::vector<size_t> r_idx;
55
- auto num_anoms = 0;
56
- auto max_outliers = (size_t) n * k;
57
-
58
- // Sort data for fast median
59
- // Use stable sort for indexes for deterministic results
60
- std::vector<size_t> indexes(n);
61
- std::iota(indexes.begin(), indexes.end(), 0);
62
- std::stable_sort(indexes.begin(), indexes.end(), [&data2](size_t a, size_t b) { return data2[a] < data2[b]; });
63
- std::sort(data2.begin(), data2.end());
64
-
65
- // Compute test statistic until r=max_outliers values have been removed from the sample
66
- for (auto i = 1; i <= max_outliers; i++) {
67
- check_for_interrupts();
68
-
69
- if (verbose) {
70
- std::cout << i << " / " << max_outliers << " completed" << std::endl;
71
- }
72
-
73
- // TODO Improve performance between loop iterations
74
- auto ma = median(data2);
75
- std::vector<float> ares;
76
- ares.reserve(data2.size());
77
- if (one_tail) {
78
- if (upper_tail) {
79
- for (auto v : data2) {
80
- ares.push_back(v - ma);
81
- }
82
- } else {
83
- for (auto v : data2) {
84
- ares.push_back(ma - v);
85
- }
86
- }
87
- } else {
88
- for (auto v : data2) {
89
- ares.push_back(fabs(v - ma));
90
- }
91
- }
92
-
93
- // Protect against constant time series
94
- auto data_sigma = mad(data2, ma);
95
- if (data_sigma == 0.0) {
96
- break;
97
- }
98
-
99
- auto iter = std::max_element(ares.begin(), ares.end());
100
- auto r_idx_i = std::distance(ares.begin(), iter);
101
-
102
- // Only need to take sigma of r for performance
103
- auto r = ares[r_idx_i] / data_sigma;
104
-
105
- r_idx.push_back(indexes[r_idx_i]);
106
- data2.erase(data2.begin() + r_idx_i);
107
- indexes.erase(indexes.begin() + r_idx_i);
108
-
109
- // Compute critical value
110
- float p;
111
- if (one_tail) {
112
- p = 1.0 - alpha / (n - i + 1);
113
- } else {
114
- p = 1.0 - alpha / (2.0 * (n - i + 1));
115
- }
116
-
117
- auto t = students_t_ppf(p, n - i - 1);
118
- auto lam = t * (n - i) / sqrt(((n - i - 1) + powf(t, 2.0)) * (n - i + 1));
119
-
120
- if (r > lam) {
121
- num_anoms = i;
122
- }
123
- }
124
-
125
- std::vector<size_t> anomalies(r_idx.begin(), r_idx.begin() + num_anoms);
126
-
127
- // Sort like R version
128
- std::sort(anomalies.begin(), anomalies.end());
129
-
130
- return anomalies;
131
- }
132
-
133
- std::vector<size_t> anomalies(const std::vector<float>& x, int period, float k, float alpha, Direction direction, bool verbose, std::function<void()> check_for_interrupts) {
134
- bool one_tail = direction != Direction::Both;
135
- bool upper_tail = direction == Direction::Positive;
136
- return detect_anoms(x, period, k, alpha, one_tail, upper_tail, verbose, check_for_interrupts);
137
- }
138
-
139
- }