anomaly_detection 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/NOTICE.txt +1 -1
- data/README.md +4 -4
- data/ext/anomaly_detection/anomaly_detection.hpp +158 -101
- data/ext/anomaly_detection/dist.h +31 -9
- data/ext/anomaly_detection/ext.cpp +24 -14
- data/ext/anomaly_detection/extconf.rb +1 -1
- data/ext/anomaly_detection/stl.hpp +638 -247
- data/lib/anomaly_detection/version.rb +1 -1
- data/licenses/LICENSE-MIT-dist-h.txt +1 -1
- data/licenses/LICENSE-MIT-stl-cpp.txt +1 -1
- data/licenses/NOTICE-AnomalyDetection-cpp.txt +1 -1
- metadata +6 -10
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: b7e239fb8a7cef345a877f6f9ad0d980f820f35a89b66248d48c6350a92ce750
|
|
4
|
+
data.tar.gz: 617ce11f0df37dc0033b120ec77a99d719bb9ba3254097cbfb38e420b6a8398f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: bde266b02df66b998313177272f2d24d749442a96f7610ea57c70e057e3aaa723248fcee8e487839f15ba078bd3ece933ecf00b803f5790b4799a9dbd909045b
|
|
7
|
+
data.tar.gz: 56873cb20e990426e03871320b4217a50d6f2ff446ac0e848f627c9aa2a8ab2e8b0f8ee6c7084f08c5795930250232e4ff0282b60a9c200fb4c5a099fc6a6033
|
data/CHANGELOG.md
CHANGED
data/NOTICE.txt
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
Copyright (C) 2015 Twitter, Inc and other contributors
|
|
2
|
-
Copyright (C) 2021-
|
|
2
|
+
Copyright (C) 2021-2026 Andrew Kane
|
|
3
3
|
|
|
4
4
|
This program is free software: you can redistribute it and/or modify
|
|
5
5
|
it under the terms of the GNU General Public License as published by
|
data/README.md
CHANGED
|
@@ -20,9 +20,9 @@ Detect anomalies in a time series
|
|
|
20
20
|
|
|
21
21
|
```ruby
|
|
22
22
|
series = {
|
|
23
|
-
Date.parse("
|
|
24
|
-
Date.parse("
|
|
25
|
-
Date.parse("
|
|
23
|
+
Date.parse("2025-01-01") => 100,
|
|
24
|
+
Date.parse("2025-01-02") => 150,
|
|
25
|
+
Date.parse("2025-01-03") => 136,
|
|
26
26
|
# ...
|
|
27
27
|
}
|
|
28
28
|
|
|
@@ -74,7 +74,7 @@ AnomalyDetection.plot(series, anomalies)
|
|
|
74
74
|
|
|
75
75
|
## Credits
|
|
76
76
|
|
|
77
|
-
This library was ported from the [AnomalyDetection](https://github.com/twitter/AnomalyDetection) R package and is available under the same license. It uses [stl-cpp](https://github.com/ankane/stl-cpp) for seasonal-trend decomposition and [dist
|
|
77
|
+
This library was ported from the [AnomalyDetection](https://github.com/twitter/AnomalyDetection) R package and is available under the same license. It uses [stl-cpp](https://github.com/ankane/stl-cpp) for seasonal-trend decomposition and [dist-c](https://github.com/ankane/dist-c) for the quantile function.
|
|
78
78
|
|
|
79
79
|
## References
|
|
80
80
|
|
|
@@ -1,15 +1,21 @@
|
|
|
1
|
-
|
|
2
|
-
* AnomalyDetection.cpp v0.
|
|
1
|
+
/*
|
|
2
|
+
* AnomalyDetection.cpp v0.3.0
|
|
3
3
|
* https://github.com/ankane/AnomalyDetection.cpp
|
|
4
4
|
* GPL-3.0-or-later License
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
#pragma once
|
|
8
8
|
|
|
9
|
+
#include <algorithm>
|
|
10
|
+
#include <cmath>
|
|
11
|
+
#include <cstddef>
|
|
9
12
|
#include <functional>
|
|
10
13
|
#include <iostream>
|
|
11
14
|
#include <iterator>
|
|
12
15
|
#include <numeric>
|
|
16
|
+
#include <span>
|
|
17
|
+
#include <stdexcept>
|
|
18
|
+
#include <utility>
|
|
13
19
|
#include <vector>
|
|
14
20
|
|
|
15
21
|
#include "dist.h"
|
|
@@ -17,62 +23,107 @@
|
|
|
17
23
|
|
|
18
24
|
namespace anomaly_detection {
|
|
19
25
|
|
|
20
|
-
|
|
26
|
+
/// The direction to detect anomalies.
|
|
27
|
+
enum class Direction {
|
|
28
|
+
/// Positive direction.
|
|
29
|
+
Positive,
|
|
30
|
+
/// Negative direction.
|
|
31
|
+
Negative,
|
|
32
|
+
/// Both directions.
|
|
33
|
+
Both
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
namespace detail {
|
|
21
37
|
|
|
22
|
-
|
|
23
|
-
|
|
38
|
+
template<typename T>
|
|
39
|
+
T median_sorted(const std::vector<T>& sorted) {
|
|
40
|
+
return (sorted.at((sorted.size() - 1) / 2) + sorted.at(sorted.size() / 2))
|
|
41
|
+
/ static_cast<T>(2.0);
|
|
24
42
|
}
|
|
25
43
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
std::
|
|
44
|
+
template<typename T>
|
|
45
|
+
T median(std::span<const T> data) {
|
|
46
|
+
std::vector<T> sorted(data.begin(), data.end());
|
|
47
|
+
std::ranges::sort(sorted);
|
|
29
48
|
return median_sorted(sorted);
|
|
30
49
|
}
|
|
31
50
|
|
|
32
|
-
|
|
33
|
-
|
|
51
|
+
template<typename T>
|
|
52
|
+
T mad(const std::vector<T>& data, T med) {
|
|
53
|
+
std::vector<T> res;
|
|
34
54
|
res.reserve(data.size());
|
|
35
55
|
for (auto v : data) {
|
|
36
|
-
res.push_back(
|
|
56
|
+
res.push_back(std::abs(v - med));
|
|
37
57
|
}
|
|
38
|
-
std::sort(res
|
|
39
|
-
return 1.4826 * median_sorted(res);
|
|
58
|
+
std::ranges::sort(res);
|
|
59
|
+
return static_cast<T>(1.4826) * median_sorted(res);
|
|
40
60
|
}
|
|
41
61
|
|
|
42
|
-
|
|
43
|
-
|
|
62
|
+
template<typename T>
|
|
63
|
+
std::vector<size_t> detect_anoms(
|
|
64
|
+
std::span<const T> data,
|
|
65
|
+
size_t num_obs_per_period,
|
|
66
|
+
float k,
|
|
67
|
+
float alpha,
|
|
68
|
+
bool one_tail,
|
|
69
|
+
bool upper_tail,
|
|
70
|
+
bool verbose,
|
|
71
|
+
const std::function<void()>& callback
|
|
72
|
+
) {
|
|
73
|
+
size_t n = data.size();
|
|
44
74
|
|
|
45
75
|
// Check to make sure we have at least two periods worth of data for anomaly context
|
|
46
|
-
if (n < num_obs_per_period
|
|
47
|
-
throw std::invalid_argument
|
|
76
|
+
if (n / 2 < num_obs_per_period) {
|
|
77
|
+
throw std::invalid_argument{"series must contain at least 2 periods"};
|
|
48
78
|
}
|
|
49
79
|
|
|
50
80
|
// Handle NANs
|
|
51
|
-
|
|
52
|
-
if (
|
|
53
|
-
throw std::invalid_argument
|
|
81
|
+
bool nans = std::ranges::any_of(data, [](const auto& value) { return std::isnan(value); });
|
|
82
|
+
if (nans) {
|
|
83
|
+
throw std::invalid_argument{"series contains NANs"};
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
if (k < 0) {
|
|
87
|
+
throw std::invalid_argument{"max_anoms must be non-negative"};
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
if (k >= 0.5) {
|
|
91
|
+
throw std::invalid_argument{"max_anoms must be less than 50% of the data points"};
|
|
54
92
|
}
|
|
55
93
|
|
|
56
|
-
|
|
94
|
+
if (alpha < 0) {
|
|
95
|
+
throw std::invalid_argument{"alpha must be non-negative"};
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
if (alpha > 0.5) {
|
|
99
|
+
throw std::invalid_argument{"alpha must not be greater than 0.5"};
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
std::vector<T> data2;
|
|
57
103
|
data2.reserve(n);
|
|
58
|
-
|
|
104
|
+
T med = median(data);
|
|
59
105
|
|
|
60
106
|
if (num_obs_per_period > 1) {
|
|
61
107
|
// Decompose data. This returns a univarite remainder which will be used for anomaly detection. Optionally, we might NOT decompose.
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
108
|
+
stl::Stl data_decomp{
|
|
109
|
+
data, num_obs_per_period, {.seasonal_length = data.size() * 10 + 1, .robust = true}
|
|
110
|
+
};
|
|
111
|
+
const std::vector<T>& seasonal = data_decomp.seasonal();
|
|
112
|
+
|
|
113
|
+
// TODO use std::views::zip for C++23
|
|
114
|
+
size_t i = 0;
|
|
115
|
+
for (auto v : data) {
|
|
116
|
+
data2.push_back(v - seasonal.at(i) - med);
|
|
117
|
+
i++;
|
|
67
118
|
}
|
|
68
119
|
} else {
|
|
69
|
-
for (
|
|
70
|
-
data2.push_back(
|
|
120
|
+
for (auto v : data) {
|
|
121
|
+
data2.push_back(v - med);
|
|
71
122
|
}
|
|
72
123
|
}
|
|
73
124
|
|
|
74
|
-
|
|
75
|
-
auto max_outliers = (
|
|
125
|
+
size_t num_anoms = 0;
|
|
126
|
+
auto max_outliers = static_cast<size_t>(static_cast<float>(n) * k);
|
|
76
127
|
std::vector<size_t> anomalies;
|
|
77
128
|
anomalies.reserve(max_outliers);
|
|
78
129
|
|
|
@@ -80,18 +131,20 @@ std::vector<size_t> detect_anoms(const std::vector<float>& data, size_t num_obs_
|
|
|
80
131
|
// Use stable sort for indexes for deterministic results
|
|
81
132
|
std::vector<size_t> indexes(n);
|
|
82
133
|
std::iota(indexes.begin(), indexes.end(), 0);
|
|
83
|
-
std::stable_sort(indexes
|
|
84
|
-
|
|
134
|
+
std::ranges::stable_sort(indexes, [&data2](size_t a, size_t b) {
|
|
135
|
+
return data2.at(a) < data2.at(b);
|
|
136
|
+
});
|
|
137
|
+
std::ranges::sort(data2);
|
|
85
138
|
|
|
86
139
|
// Compute test statistic until r=max_outliers values have been removed from the sample
|
|
87
|
-
for (
|
|
140
|
+
for (size_t i = 1; i <= max_outliers; i++) {
|
|
88
141
|
if (verbose) {
|
|
89
142
|
std::cout << i << " / " << max_outliers << " completed" << std::endl;
|
|
90
143
|
}
|
|
91
144
|
|
|
92
145
|
// TODO Improve performance between loop iterations
|
|
93
|
-
|
|
94
|
-
std::vector<
|
|
146
|
+
T ma = median_sorted(data2);
|
|
147
|
+
std::vector<T> ares;
|
|
95
148
|
ares.reserve(data2.size());
|
|
96
149
|
if (one_tail) {
|
|
97
150
|
if (upper_tail) {
|
|
@@ -105,36 +158,34 @@ std::vector<size_t> detect_anoms(const std::vector<float>& data, size_t num_obs_
|
|
|
105
158
|
}
|
|
106
159
|
} else {
|
|
107
160
|
for (auto v : data2) {
|
|
108
|
-
ares.push_back(
|
|
161
|
+
ares.push_back(std::abs(v - ma));
|
|
109
162
|
}
|
|
110
163
|
}
|
|
111
164
|
|
|
112
165
|
// Protect against constant time series
|
|
113
|
-
|
|
166
|
+
T data_sigma = mad(data2, ma);
|
|
114
167
|
if (data_sigma == 0.0) {
|
|
115
168
|
break;
|
|
116
169
|
}
|
|
117
170
|
|
|
118
|
-
auto iter = std::max_element(ares
|
|
119
|
-
|
|
171
|
+
auto iter = std::ranges::max_element(ares);
|
|
172
|
+
ptrdiff_t r_idx_i = std::distance(ares.begin(), iter);
|
|
120
173
|
|
|
121
174
|
// Only need to take sigma of r for performance
|
|
122
|
-
|
|
175
|
+
T r = ares.at(static_cast<size_t>(r_idx_i)) / data_sigma;
|
|
123
176
|
|
|
124
|
-
anomalies.push_back(indexes
|
|
177
|
+
anomalies.push_back(indexes.at(static_cast<size_t>(r_idx_i)));
|
|
125
178
|
data2.erase(data2.begin() + r_idx_i);
|
|
126
179
|
indexes.erase(indexes.begin() + r_idx_i);
|
|
127
180
|
|
|
128
181
|
// Compute critical value
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
} else {
|
|
133
|
-
p = 1.0 - alpha / (2.0 * (n - i + 1));
|
|
134
|
-
}
|
|
182
|
+
double p = one_tail
|
|
183
|
+
? (1.0 - alpha / static_cast<double>(n - i + 1))
|
|
184
|
+
: (1.0 - alpha / (2.0 * static_cast<double>(n - i + 1)));
|
|
135
185
|
|
|
136
|
-
|
|
137
|
-
|
|
186
|
+
double t = students_t_ppf(p, static_cast<double>(n - i - 1));
|
|
187
|
+
double lam = t * static_cast<double>(n - i)
|
|
188
|
+
/ std::sqrt((static_cast<double>(n - i - 1) + t * t) * static_cast<double>(n - i + 1));
|
|
138
189
|
|
|
139
190
|
if (r > lam) {
|
|
140
191
|
num_anoms = i;
|
|
@@ -148,63 +199,69 @@ std::vector<size_t> detect_anoms(const std::vector<float>& data, size_t num_obs_
|
|
|
148
199
|
anomalies.resize(num_anoms);
|
|
149
200
|
|
|
150
201
|
// Sort like R version
|
|
151
|
-
std::sort(anomalies
|
|
202
|
+
std::ranges::sort(anomalies);
|
|
152
203
|
|
|
153
204
|
return anomalies;
|
|
154
205
|
}
|
|
155
206
|
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
float
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
this->alpha_ = alpha;
|
|
171
|
-
return *this;
|
|
172
|
-
};
|
|
173
|
-
|
|
174
|
-
inline AnomalyDetectionParams max_anoms(float max_anoms) {
|
|
175
|
-
this->max_anoms_ = max_anoms;
|
|
176
|
-
return *this;
|
|
177
|
-
};
|
|
178
|
-
|
|
179
|
-
inline AnomalyDetectionParams direction(Direction direction) {
|
|
180
|
-
this->direction_ = direction;
|
|
181
|
-
return *this;
|
|
182
|
-
};
|
|
183
|
-
|
|
184
|
-
inline AnomalyDetectionParams verbose(bool verbose) {
|
|
185
|
-
this->verbose_ = verbose;
|
|
186
|
-
return *this;
|
|
187
|
-
};
|
|
188
|
-
|
|
189
|
-
inline AnomalyDetectionParams callback(std::function<void()> callback) {
|
|
190
|
-
this->callback_ = callback;
|
|
191
|
-
return *this;
|
|
192
|
-
};
|
|
193
|
-
|
|
194
|
-
AnomalyDetectionResult fit(const std::vector<float>& series, size_t period);
|
|
207
|
+
} // namespace detail
|
|
208
|
+
|
|
209
|
+
/// A set of anomaly detection parameters.
|
|
210
|
+
struct AnomalyDetectionParams {
|
|
211
|
+
/// Sets the level of statistical significance.
|
|
212
|
+
float alpha = 0.05f;
|
|
213
|
+
/// Sets the maximum number of anomalies as percent of data.
|
|
214
|
+
float max_anoms = 0.1f;
|
|
215
|
+
/// Sets the direction.
|
|
216
|
+
Direction direction = Direction::Both;
|
|
217
|
+
/// Sets whether to show progress.
|
|
218
|
+
bool verbose = false;
|
|
219
|
+
/// Sets a callback for each iteration.
|
|
220
|
+
std::function<void()> callback = nullptr;
|
|
195
221
|
};
|
|
196
222
|
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
223
|
+
/// An anomaly detection result.
|
|
224
|
+
class AnomalyDetection {
|
|
225
|
+
public:
|
|
226
|
+
/// Detects anomalies in a time series from a span.
|
|
227
|
+
template<typename T>
|
|
228
|
+
AnomalyDetection(
|
|
229
|
+
std::span<const T> series,
|
|
230
|
+
size_t period,
|
|
231
|
+
const AnomalyDetectionParams& params = AnomalyDetectionParams()
|
|
232
|
+
) {
|
|
233
|
+
bool one_tail = params.direction != Direction::Both;
|
|
234
|
+
bool upper_tail = params.direction == Direction::Positive;
|
|
235
|
+
|
|
236
|
+
std::vector<size_t> anomalies = detail::detect_anoms(
|
|
237
|
+
series,
|
|
238
|
+
period,
|
|
239
|
+
params.max_anoms,
|
|
240
|
+
params.alpha,
|
|
241
|
+
one_tail,
|
|
242
|
+
upper_tail,
|
|
243
|
+
params.verbose,
|
|
244
|
+
params.callback
|
|
245
|
+
);
|
|
246
|
+
anomalies_ = std::move(anomalies);
|
|
247
|
+
}
|
|
200
248
|
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
249
|
+
/// Detects anomalies in a time series from a vector.
|
|
250
|
+
template<typename T>
|
|
251
|
+
AnomalyDetection(
|
|
252
|
+
const std::vector<T>& series,
|
|
253
|
+
size_t period,
|
|
254
|
+
const AnomalyDetectionParams& params = AnomalyDetectionParams()
|
|
255
|
+
) :
|
|
256
|
+
AnomalyDetection(std::span<const T>{series}, period, params) {}
|
|
257
|
+
|
|
258
|
+
/// Returns the anomalies.
|
|
259
|
+
const std::vector<size_t>& anomalies() const {
|
|
260
|
+
return anomalies_;
|
|
261
|
+
}
|
|
204
262
|
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
}
|
|
263
|
+
private:
|
|
264
|
+
std::vector<size_t> anomalies_;
|
|
265
|
+
};
|
|
209
266
|
|
|
210
|
-
}
|
|
267
|
+
} // namespace anomaly_detection
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
|
|
2
|
-
*
|
|
1
|
+
/*
|
|
2
|
+
* Dist C v0.3.1
|
|
3
3
|
* https://github.com/ankane/dist.h
|
|
4
4
|
* Unlicense OR MIT License
|
|
5
5
|
*/
|
|
@@ -8,6 +8,8 @@
|
|
|
8
8
|
|
|
9
9
|
#include <math.h>
|
|
10
10
|
|
|
11
|
+
/// @cond
|
|
12
|
+
|
|
11
13
|
#ifdef M_E
|
|
12
14
|
#define DIST_E M_E
|
|
13
15
|
#else
|
|
@@ -26,7 +28,14 @@
|
|
|
26
28
|
#define DIST_SQRT2 1.41421356237309504880
|
|
27
29
|
#endif
|
|
28
30
|
|
|
29
|
-
|
|
31
|
+
/// @endcond
|
|
32
|
+
|
|
33
|
+
#ifdef __cplusplus
|
|
34
|
+
extern "C" {
|
|
35
|
+
#endif
|
|
36
|
+
|
|
37
|
+
/// Returns the probability density function (PDF) of the normal distribution.
|
|
38
|
+
static inline double normal_pdf(double x, double mean, double std_dev) {
|
|
30
39
|
if (std_dev <= 0) {
|
|
31
40
|
return NAN;
|
|
32
41
|
}
|
|
@@ -35,7 +44,8 @@ double normal_pdf(double x, double mean, double std_dev) {
|
|
|
35
44
|
return (1.0 / (std_dev * sqrt(2.0 * DIST_PI))) * pow(DIST_E, -0.5 * n * n);
|
|
36
45
|
}
|
|
37
46
|
|
|
38
|
-
|
|
47
|
+
/// Returns the cumulative distribution function (CDF) of the normal distribution.
|
|
48
|
+
static inline double normal_cdf(double x, double mean, double std_dev) {
|
|
39
49
|
if (std_dev <= 0) {
|
|
40
50
|
return NAN;
|
|
41
51
|
}
|
|
@@ -43,10 +53,11 @@ double normal_cdf(double x, double mean, double std_dev) {
|
|
|
43
53
|
return 0.5 * (1.0 + erf((x - mean) / (std_dev * DIST_SQRT2)));
|
|
44
54
|
}
|
|
45
55
|
|
|
56
|
+
/// Returns the percent-point/quantile function (PPF) of the normal distribution.
|
|
46
57
|
// Wichura, M. J. (1988).
|
|
47
58
|
// Algorithm AS 241: The Percentage Points of the Normal Distribution.
|
|
48
59
|
// Journal of the Royal Statistical Society. Series C (Applied Statistics), 37(3), 477-484.
|
|
49
|
-
double normal_ppf(double p, double mean, double std_dev) {
|
|
60
|
+
static inline double normal_ppf(double p, double mean, double std_dev) {
|
|
50
61
|
if (p < 0 || p > 1 || std_dev <= 0 || isnan(mean) || isnan(std_dev)) {
|
|
51
62
|
return NAN;
|
|
52
63
|
}
|
|
@@ -83,7 +94,8 @@ double normal_ppf(double p, double mean, double std_dev) {
|
|
|
83
94
|
}
|
|
84
95
|
}
|
|
85
96
|
|
|
86
|
-
|
|
97
|
+
/// Returns the probability density function (PDF) of the Student's t distribution.
|
|
98
|
+
static inline double students_t_pdf(double x, double n) {
|
|
87
99
|
if (n <= 0) {
|
|
88
100
|
return NAN;
|
|
89
101
|
}
|
|
@@ -95,10 +107,11 @@ double students_t_pdf(double x, double n) {
|
|
|
95
107
|
return tgamma((n + 1.0) / 2.0) / (sqrt(n * DIST_PI) * tgamma(n / 2.0)) * pow(1.0 + x * x / n, -(n + 1.0) / 2.0);
|
|
96
108
|
}
|
|
97
109
|
|
|
110
|
+
/// Returns the cumulative distribution function (CDF) of the Student's t distribution.
|
|
98
111
|
// Hill, G. W. (1970).
|
|
99
112
|
// Algorithm 395: Student's t-distribution.
|
|
100
113
|
// Communications of the ACM, 13(10), 617-619.
|
|
101
|
-
double students_t_cdf(double x, double n) {
|
|
114
|
+
static inline double students_t_cdf(double x, double n) {
|
|
102
115
|
if (n < 1) {
|
|
103
116
|
return NAN;
|
|
104
117
|
}
|
|
@@ -159,7 +172,7 @@ double students_t_cdf(double x, double n) {
|
|
|
159
172
|
return start + sign * (z - a) / 2;
|
|
160
173
|
}
|
|
161
174
|
|
|
162
|
-
// tail series
|
|
175
|
+
// tail series expansion for large t-values
|
|
163
176
|
double a = sqrt(b);
|
|
164
177
|
y = a * n;
|
|
165
178
|
int j = 0;
|
|
@@ -182,10 +195,11 @@ double students_t_cdf(double x, double n) {
|
|
|
182
195
|
return start + sign * (z - a) / 2;
|
|
183
196
|
}
|
|
184
197
|
|
|
198
|
+
/// Returns the percent-point/quantile function (PPF) of the Student's t distribution.
|
|
185
199
|
// Hill, G. W. (1970).
|
|
186
200
|
// Algorithm 396: Student's t-quantiles.
|
|
187
201
|
// Communications of the ACM, 13(10), 619-620.
|
|
188
|
-
double students_t_ppf(double p, double n) {
|
|
202
|
+
static inline double students_t_ppf(double p, double n) {
|
|
189
203
|
if (p < 0 || p > 1 || n < 1) {
|
|
190
204
|
return NAN;
|
|
191
205
|
}
|
|
@@ -234,3 +248,11 @@ double students_t_ppf(double p, double n) {
|
|
|
234
248
|
}
|
|
235
249
|
return sign * sqrt(n * y);
|
|
236
250
|
}
|
|
251
|
+
|
|
252
|
+
#ifdef __cplusplus
|
|
253
|
+
}
|
|
254
|
+
#endif
|
|
255
|
+
|
|
256
|
+
#undef DIST_E
|
|
257
|
+
#undef DIST_PI
|
|
258
|
+
#undef DIST_SQRT2
|
|
@@ -1,18 +1,27 @@
|
|
|
1
|
+
#include <cstddef>
|
|
2
|
+
#include <stdexcept>
|
|
3
|
+
#include <string>
|
|
4
|
+
#include <vector>
|
|
5
|
+
|
|
1
6
|
#include <rice/rice.hpp>
|
|
2
|
-
#include <rice/stl.hpp>
|
|
3
7
|
|
|
4
8
|
#include "anomaly_detection.hpp"
|
|
5
9
|
|
|
10
|
+
using anomaly_detection::AnomalyDetection;
|
|
11
|
+
using anomaly_detection::AnomalyDetectionParams;
|
|
6
12
|
using anomaly_detection::Direction;
|
|
7
13
|
|
|
8
14
|
extern "C"
|
|
9
15
|
void Init_ext() {
|
|
10
|
-
|
|
16
|
+
Rice::Module rb_mAnomalyDetection = Rice::define_module("AnomalyDetection");
|
|
11
17
|
|
|
12
18
|
rb_mAnomalyDetection
|
|
13
19
|
.define_singleton_function(
|
|
14
20
|
"_detect",
|
|
15
|
-
[](
|
|
21
|
+
[](Rice::Array rb_series, size_t period, float k, float alpha, Rice::String rb_direction, bool verbose) {
|
|
22
|
+
std::vector<float> series = rb_series.to_vector<float>();
|
|
23
|
+
std::string direction = rb_direction.str();
|
|
24
|
+
|
|
16
25
|
Direction dir;
|
|
17
26
|
if (direction == "pos") {
|
|
18
27
|
dir = Direction::Positive;
|
|
@@ -24,17 +33,18 @@ void Init_ext() {
|
|
|
24
33
|
throw std::invalid_argument("direction must be pos, neg, or both");
|
|
25
34
|
}
|
|
26
35
|
|
|
27
|
-
|
|
28
|
-
.
|
|
29
|
-
.
|
|
30
|
-
.direction
|
|
31
|
-
.verbose
|
|
32
|
-
.callback
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
36
|
+
AnomalyDetectionParams params{
|
|
37
|
+
.alpha = alpha,
|
|
38
|
+
.max_anoms = k,
|
|
39
|
+
.direction = dir,
|
|
40
|
+
.verbose = verbose,
|
|
41
|
+
.callback = rb_thread_check_ints
|
|
42
|
+
};
|
|
43
|
+
AnomalyDetection res{series, period, params};
|
|
44
|
+
|
|
45
|
+
Rice::Array a;
|
|
46
|
+
for (const auto v : res.anomalies()) {
|
|
47
|
+
a.push(v, false);
|
|
38
48
|
}
|
|
39
49
|
return a;
|
|
40
50
|
});
|