anomaly_detection 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 94e6edb7ef4ce6db5fdb7b01ab40322bc8daceb222c0388abe19704ef2ca7f99
4
- data.tar.gz: 41e4c55f5f42251a25fc337941ee85ba306df15ca925ebfe6d1d59129cc650cc
3
+ metadata.gz: cfb709e43863f4221a67e8f675f28b5361f6bf33a0d0f6fa4f52cdc0cad01796
4
+ data.tar.gz: 40965f08bb75cdb673d43e42c7fc47403fbfb1b082de10cab86bac569916d5b8
5
5
  SHA512:
6
- metadata.gz: a534a1903b14e7e3287b86b8da3936ee889f0e93690a55f5cbba5a384dfd06baab1ded6afb71acec6eb2461c0ac0d6645d28a5093ef33a4c28dce0319ef9ae75
7
- data.tar.gz: 90fcd64ce191e8aaff1a46384e003f8eeea7b6e9732357fdd7726066bf4e3ad1a8fe91043042716f57e931b05c048d984c60178c05e38d127d21216fe5a2ee2f
6
+ metadata.gz: '0496d044ecbe143be64164bd88092c5a5bd660f5fec6425e9f6ea0759f2ab7dbaa41a055c9d03a78c8e5a661b86bb25629c5e7a809614ba415d941f4d63fc9cb'
7
+ data.tar.gz: 8cc3f28c981d0be5cdb3dbfd054910d45469d258a662e163d7b01379af93e6714874d043ff1521ae19506f742b63eb93bf631f6f895c11842c4c15028c66a4b8
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.1.3 (2022-01-03)
2
+
3
+ - Switched to dist.h
4
+
1
5
  ## 0.1.2 (2021-10-20)
2
6
 
3
7
  - Added `plot` method
data/README.md CHANGED
@@ -58,7 +58,7 @@ AnomalyDetection.detect(
58
58
  )
59
59
  ```
60
60
 
61
- ## Plotting [unreleased]
61
+ ## Plotting
62
62
 
63
63
  Add [Vega](https://github.com/ankane/vega) to your application’s Gemfile:
64
64
 
@@ -74,7 +74,7 @@ AnomalyDetection.plot(series, anomalies)
74
74
 
75
75
  ## Credits
76
76
 
77
- This library was ported from the [AnomalyDetection](https://github.com/twitter/AnomalyDetection) R package and is available under the same license. It uses [cdflib](https://people.sc.fsu.edu/~jburkardt/cpp_src/cdflib/cdflib.html) for the quantile function.
77
+ This library was ported from the [AnomalyDetection](https://github.com/twitter/AnomalyDetection) R package and is available under the same license. It uses [stl-cpp](https://github.com/ankane/stl-cpp) for seasonal-trend decomposition and [dist.h](https://github.com/ankane/dist.h) for the quantile function.
78
78
 
79
79
  ## References
80
80
 
@@ -6,7 +6,7 @@
6
6
  #include <vector>
7
7
 
8
8
  #include "anomaly_detection.hpp"
9
- #include "cdflib.hpp"
9
+ #include "dist.h"
10
10
  #include "stl.hpp"
11
11
 
12
12
  namespace anomaly_detection {
@@ -25,21 +25,7 @@ float mad(const std::vector<float>& data, float med) {
25
25
  return 1.4826 * median(res);
26
26
  }
27
27
 
28
- float qt(double p, double df) {
29
- int which = 2;
30
- double q = 1 - p;
31
- double t;
32
- int status;
33
- double bound;
34
- cdft(&which, &p, &q, &t, &df, &status, &bound);
35
-
36
- if (status != 0) {
37
- throw std::invalid_argument("Bad status");
38
- }
39
- return t;
40
- }
41
-
42
- std::vector<size_t> detect_anoms(const std::vector<float>& data, int num_obs_per_period, float k, float alpha, bool one_tail, bool upper_tail, bool verbose, std::function<void()> interrupt) {
28
+ std::vector<size_t> detect_anoms(const std::vector<float>& data, int num_obs_per_period, float k, float alpha, bool one_tail, bool upper_tail, bool verbose, std::function<void()> check_for_interrupts) {
43
29
  auto n = data.size();
44
30
 
45
31
  // Check to make sure we have at least two periods worth of data for anomaly context
@@ -78,8 +64,7 @@ std::vector<size_t> detect_anoms(const std::vector<float>& data, int num_obs_per
78
64
 
79
65
  // Compute test statistic until r=max_outliers values have been removed from the sample
80
66
  for (auto i = 1; i <= max_outliers; i++) {
81
- // Check for interrupts
82
- interrupt();
67
+ check_for_interrupts();
83
68
 
84
69
  if (verbose) {
85
70
  std::cout << i << " / " << max_outliers << " completed" << std::endl;
@@ -129,7 +114,7 @@ std::vector<size_t> detect_anoms(const std::vector<float>& data, int num_obs_per
129
114
  p = 1.0 - alpha / (2.0 * (n - i + 1));
130
115
  }
131
116
 
132
- auto t = qt(p, n - i - 1);
117
+ auto t = students_t_ppf(p, n - i - 1);
133
118
  auto lam = t * (n - i) / sqrt(((n - i - 1) + powf(t, 2.0)) * (n - i + 1));
134
119
 
135
120
  if (r > lam) {
@@ -145,10 +130,10 @@ std::vector<size_t> detect_anoms(const std::vector<float>& data, int num_obs_per
145
130
  return anomalies;
146
131
  }
147
132
 
148
- std::vector<size_t> anomalies(const std::vector<float>& x, int period, float k, float alpha, Direction direction, bool verbose, std::function<void()> interrupt) {
133
+ std::vector<size_t> anomalies(const std::vector<float>& x, int period, float k, float alpha, Direction direction, bool verbose, std::function<void()> check_for_interrupts) {
149
134
  bool one_tail = direction != Direction::Both;
150
135
  bool upper_tail = direction == Direction::Positive;
151
- return detect_anoms(x, period, k, alpha, one_tail, upper_tail, verbose, interrupt);
136
+ return detect_anoms(x, period, k, alpha, one_tail, upper_tail, verbose, check_for_interrupts);
152
137
  }
153
138
 
154
139
  }
@@ -0,0 +1,180 @@
1
+ /*!
2
+ * dist.h v0.1.0
3
+ * https://github.com/ankane/dist.h
4
+ * Unlicense OR MIT License
5
+ */
6
+
7
+ #pragma once
8
+
9
+ #define _USE_MATH_DEFINES
10
+
11
+ #include <assert.h>
12
+ #include <math.h>
13
+
14
+ // Winitzki, S. (2008).
15
+ // A handy approximation for the error function and its inverse.
16
+ // https://drive.google.com/file/d/0B2Mt7luZYBrwZlctV3A3eF82VGM/view?resourcekey=0-UQpPhwZgzP0sF4LHBDlLtg
17
+ // from https://sites.google.com/site/winitzki
18
+ double erf(double x) {
19
+ double sign = x < 0 ? -1.0 : 1.0;
20
+ x = x < 0 ? -x : x;
21
+
22
+ double a = 0.14;
23
+ double x2 = x * x;
24
+ return sign * sqrt(1.0 - exp(-x2 * (4.0 / M_PI + a * x2) / (1.0 + a * x2)));
25
+ }
26
+
27
+ // Winitzki, S. (2008).
28
+ // A handy approximation for the error function and its inverse.
29
+ // https://drive.google.com/file/d/0B2Mt7luZYBrwZlctV3A3eF82VGM/view?resourcekey=0-UQpPhwZgzP0sF4LHBDlLtg
30
+ // from https://sites.google.com/site/winitzki
31
+ double inverse_erf(double x) {
32
+ double sign = x < 0 ? -1.0 : 1.0;
33
+ x = x < 0 ? -x : x;
34
+
35
+ double a = 0.147;
36
+ double ln = log(1.0 - x * x);
37
+ double f1 = 2.0 / (M_PI * a);
38
+ double f2 = ln / 2.0;
39
+ double f3 = f1 + f2;
40
+ double f4 = 1.0 / a * ln;
41
+ return sign * sqrt(-f1 - f2 + sqrt(f3 * f3 - f4));
42
+ }
43
+
44
+ double normal_pdf(double x, double mean, double std_dev) {
45
+ double var = std_dev * std_dev;
46
+ return (1.0 / (var * sqrt(2.0 * M_PI))) * pow(M_E, -0.5 * pow((x - mean) / var, 2));
47
+ }
48
+
49
+ double normal_cdf(double x, double mean, double std_dev) {
50
+ return 0.5 * (1.0 + erf((x - mean) / (std_dev * std_dev * sqrt(2))));
51
+ }
52
+
53
+ double normal_ppf(double p, double mean, double std_dev) {
54
+ assert(p >= 0 && p <= 1);
55
+
56
+ return mean + (std_dev * std_dev) * sqrt(2) * inverse_erf(2.0 * p - 1.0);
57
+ }
58
+
59
+ double students_t_pdf(double x, unsigned int n) {
60
+ assert(n >= 1);
61
+
62
+ return tgamma((n + 1.0) / 2.0) / (sqrt(n * M_PI) * tgamma(n / 2.0)) * pow(1.0 + x * x / n, -(n + 1.0) / 2.0);
63
+ }
64
+
65
+ // Hill, G. W. (1970).
66
+ // Algorithm 395: Student's t-distribution.
67
+ // Communications of the ACM, 13(10), 617-619.
68
+ double students_t_cdf(double x, unsigned int n) {
69
+ assert(n >= 1);
70
+
71
+ double start = x < 0 ? 0 : 1;
72
+ double sign = x < 0 ? 1 : -1;
73
+
74
+ double z = 1.0;
75
+ double t = x * x;
76
+ double y = t / n;
77
+ double b = 1.0 + y;
78
+
79
+ if ((n >= 20 && t < n) || n > 200) {
80
+ // asymptotic series for large or noninteger n
81
+ if (y > 10e-6) {
82
+ y = log(b);
83
+ }
84
+ double a = n - 0.5;
85
+ b = 48.0 * a * a;
86
+ y = a * y;
87
+ y = (((((-0.4 * y - 3.3) * y - 24.0) * y - 85.5) / (0.8 * y * y + 100.0 + b) + y + 3.0) / b + 1.0) * sqrt(y);
88
+ return start + sign * normal_cdf(-y, 0.0, 1.0);
89
+ }
90
+
91
+ if (n < 20 && t < 4.0) {
92
+ // nested summation of cosine series
93
+ y = sqrt(y);
94
+ double a = y;
95
+ if (n == 1) {
96
+ a = 0.0;
97
+ }
98
+
99
+ // loop
100
+ if (n > 1) {
101
+ n -= 2;
102
+ while (n > 1) {
103
+ a = (n - 1) / (b * n) * a + y;
104
+ n -= 2;
105
+ }
106
+ }
107
+ a = n == 0 ? a / sqrt(b) : (atan(y) + a / b) * (2.0 / M_PI);
108
+ return start + sign * (z - a) / 2;
109
+ }
110
+
111
+ // tail series expanation for large t-values
112
+ double a = sqrt(b);
113
+ y = a * n;
114
+ int j = 0;
115
+ while (a != z) {
116
+ j += 2;
117
+ z = a;
118
+ y = y * (j - 1) / (b * j);
119
+ a = a + y / (n + j);
120
+ }
121
+ z = 0.0;
122
+ y = 0.0;
123
+ a = -a;
124
+
125
+ // loop (without n + 2 and n - 2)
126
+ while (n > 1) {
127
+ a = (n - 1) / (b * n) * a + y;
128
+ n -= 2;
129
+ }
130
+ a = n == 0 ? a / sqrt(b) : (atan(y) + a / b) * (2.0 / M_PI);
131
+ return start + sign * (z - a) / 2;
132
+ }
133
+
134
+ // Hill, G. W. (1970).
135
+ // Algorithm 396: Student's t-quantiles.
136
+ // Communications of the ACM, 13(10), 619-620.
137
+ double students_t_ppf(double p, unsigned int n) {
138
+ assert(p >= 0 && p <= 1);
139
+ assert(n >= 1);
140
+
141
+ // distribution is symmetric
142
+ double sign = p < 0.5 ? -1 : 1;
143
+ p = p < 0.5 ? 1 - p : p;
144
+
145
+ // two-tail to one-tail
146
+ p = 2.0 * (1.0 - p);
147
+
148
+ if (n == 2) {
149
+ return sign * sqrt(2.0 / (p * (2.0 - p)) - 2.0);
150
+ }
151
+
152
+ double half_pi = M_PI / 2.0;
153
+
154
+ if (n == 1) {
155
+ p = p * half_pi;
156
+ return sign * cos(p) / sin(p);
157
+ }
158
+
159
+ double a = 1.0 / (n - 0.5);
160
+ double b = 48.0 / (a * a);
161
+ double c = ((20700.0 * a / b - 98.0) * a - 16.0) * a + 96.36;
162
+ double d = ((94.5 / (b + c) - 3.0) / b + 1.0) * sqrt(a * half_pi) * n;
163
+ double x = d * p;
164
+ double y = pow(x, 2.0 / n);
165
+ if (y > 0.05 + a) {
166
+ // asymptotic inverse expansion about normal
167
+ x = normal_ppf(p * 0.5, 0.0, 1.0);
168
+ y = x * x;
169
+ if (n < 5) {
170
+ c += 0.3 * (n - 4.5) * (x + 0.6);
171
+ }
172
+ c = (((0.05 * d * x - 5.0) * x - 7.0) * x - 2.0) * x + b + c;
173
+ y = (((((0.4 * y + 6.3) * y + 36.0) * y + 94.5) / c - y - 3.0) / b + 1.0) * x;
174
+ y = a * y * y;
175
+ y = y > 0.002 ? exp(y) - 1.0 : 0.5 * y * y + y;
176
+ } else {
177
+ y = ((1.0 / (((n + 6.0) / (n * y) - 0.089 * d - 0.822) * (n + 2.0) * 3.0) + 0.5 / (n + 4.0)) * y - 1.0) * (n + 1.0) / (n + 2.0) + 1.0 / y;
178
+ }
179
+ return sign * sqrt(n * y);
180
+ }
@@ -1,3 +1,3 @@
1
1
  module AnomalyDetection
2
- VERSION = "0.1.2"
2
+ VERSION = "0.1.3"
3
3
  end
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2021 Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,24 @@
1
+ This is free and unencumbered software released into the public domain.
2
+
3
+ Anyone is free to copy, modify, publish, use, compile, sell, or
4
+ distribute this software, either in source code form or as a compiled
5
+ binary, for any purpose, commercial or non-commercial, and by any
6
+ means.
7
+
8
+ In jurisdictions that recognize copyright laws, the author or authors
9
+ of this software dedicate any and all copyright interest in the
10
+ software to the public domain. We make this dedication for the benefit
11
+ of the public at large and to the detriment of our heirs and
12
+ successors. We intend this dedication to be an overt act of
13
+ relinquishment in perpetuity of all present and future rights to this
14
+ software under copyright law.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19
+ IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20
+ OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21
+ ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22
+ OTHER DEALINGS IN THE SOFTWARE.
23
+
24
+ For more information, please refer to <http://unlicense.org/>
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: anomaly_detection
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-10-20 00:00:00.000000000 Z
11
+ date: 2022-01-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rice
@@ -37,15 +37,15 @@ files:
37
37
  - README.md
38
38
  - ext/anomaly_detection/anomaly_detection.cpp
39
39
  - ext/anomaly_detection/anomaly_detection.hpp
40
- - ext/anomaly_detection/cdflib.cpp
41
- - ext/anomaly_detection/cdflib.hpp
40
+ - ext/anomaly_detection/dist.h
42
41
  - ext/anomaly_detection/ext.cpp
43
42
  - ext/anomaly_detection/extconf.rb
44
43
  - ext/anomaly_detection/stl.hpp
45
44
  - lib/anomaly_detection.rb
46
45
  - lib/anomaly_detection/version.rb
46
+ - licenses/LICENSE-MIT-dist-h.txt
47
47
  - licenses/LICENSE-MIT-stl-cpp.txt
48
- - licenses/LICENSE-cdflib.txt
48
+ - licenses/UNLICENSE-dist-h.txt
49
49
  - licenses/UNLICENSE-stl-cpp.txt
50
50
  homepage: https://github.com/ankane/AnomalyDetection.rb
51
51
  licenses:
@@ -66,7 +66,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
66
66
  - !ruby/object:Gem::Version
67
67
  version: '0'
68
68
  requirements: []
69
- rubygems_version: 3.2.22
69
+ rubygems_version: 3.2.32
70
70
  signing_key:
71
71
  specification_version: 4
72
72
  summary: Time series anomaly detection for Ruby