anomaly_detection 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/NOTICE.txt +1 -1
- data/README.md +1 -1
- data/ext/anomaly_detection/anomaly_detection.hpp +124 -113
- data/ext/anomaly_detection/dist.h +31 -9
- data/ext/anomaly_detection/ext.cpp +19 -12
- data/ext/anomaly_detection/extconf.rb +1 -1
- data/ext/anomaly_detection/stl.hpp +502 -426
- data/lib/anomaly_detection/version.rb +1 -1
- data/licenses/LICENSE-MIT-dist-h.txt +1 -1
- data/licenses/LICENSE-MIT-stl-cpp.txt +1 -1
- data/licenses/NOTICE-AnomalyDetection-cpp.txt +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: b7e239fb8a7cef345a877f6f9ad0d980f820f35a89b66248d48c6350a92ce750
|
|
4
|
+
data.tar.gz: 617ce11f0df37dc0033b120ec77a99d719bb9ba3254097cbfb38e420b6a8398f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: bde266b02df66b998313177272f2d24d749442a96f7610ea57c70e057e3aaa723248fcee8e487839f15ba078bd3ece933ecf00b803f5790b4799a9dbd909045b
|
|
7
|
+
data.tar.gz: 56873cb20e990426e03871320b4217a50d6f2ff446ac0e848f627c9aa2a8ab2e8b0f8ee6c7084f08c5795930250232e4ff0282b60a9c200fb4c5a099fc6a6033
|
data/CHANGELOG.md
CHANGED
data/NOTICE.txt
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
Copyright (C) 2015 Twitter, Inc and other contributors
|
|
2
|
-
Copyright (C) 2021-
|
|
2
|
+
Copyright (C) 2021-2026 Andrew Kane
|
|
3
3
|
|
|
4
4
|
This program is free software: you can redistribute it and/or modify
|
|
5
5
|
it under the terms of the GNU General Public License as published by
|
data/README.md
CHANGED
|
@@ -74,7 +74,7 @@ AnomalyDetection.plot(series, anomalies)
|
|
|
74
74
|
|
|
75
75
|
## Credits
|
|
76
76
|
|
|
77
|
-
This library was ported from the [AnomalyDetection](https://github.com/twitter/AnomalyDetection) R package and is available under the same license. It uses [stl-cpp](https://github.com/ankane/stl-cpp) for seasonal-trend decomposition and [dist
|
|
77
|
+
This library was ported from the [AnomalyDetection](https://github.com/twitter/AnomalyDetection) R package and is available under the same license. It uses [stl-cpp](https://github.com/ankane/stl-cpp) for seasonal-trend decomposition and [dist-c](https://github.com/ankane/dist-c) for the quantile function.
|
|
78
78
|
|
|
79
79
|
## References
|
|
80
80
|
|
|
@@ -1,20 +1,22 @@
|
|
|
1
|
-
|
|
2
|
-
* AnomalyDetection.cpp v0.
|
|
1
|
+
/*
|
|
2
|
+
* AnomalyDetection.cpp v0.3.0
|
|
3
3
|
* https://github.com/ankane/AnomalyDetection.cpp
|
|
4
4
|
* GPL-3.0-or-later License
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
#pragma once
|
|
8
8
|
|
|
9
|
+
#include <algorithm>
|
|
10
|
+
#include <cmath>
|
|
11
|
+
#include <cstddef>
|
|
9
12
|
#include <functional>
|
|
10
13
|
#include <iostream>
|
|
11
14
|
#include <iterator>
|
|
12
15
|
#include <numeric>
|
|
13
|
-
#include <vector>
|
|
14
|
-
|
|
15
|
-
#if __cplusplus >= 202002L
|
|
16
16
|
#include <span>
|
|
17
|
-
#
|
|
17
|
+
#include <stdexcept>
|
|
18
|
+
#include <utility>
|
|
19
|
+
#include <vector>
|
|
18
20
|
|
|
19
21
|
#include "dist.h"
|
|
20
22
|
#include "stl.hpp"
|
|
@@ -31,17 +33,18 @@ enum class Direction {
|
|
|
31
33
|
Both
|
|
32
34
|
};
|
|
33
35
|
|
|
34
|
-
namespace {
|
|
36
|
+
namespace detail {
|
|
35
37
|
|
|
36
38
|
template<typename T>
|
|
37
39
|
T median_sorted(const std::vector<T>& sorted) {
|
|
38
|
-
return (sorted
|
|
40
|
+
return (sorted.at((sorted.size() - 1) / 2) + sorted.at(sorted.size() / 2))
|
|
41
|
+
/ static_cast<T>(2.0);
|
|
39
42
|
}
|
|
40
43
|
|
|
41
44
|
template<typename T>
|
|
42
|
-
T median(const T
|
|
43
|
-
std::vector<T> sorted(data, data
|
|
44
|
-
std::sort(sorted
|
|
45
|
+
T median(std::span<const T> data) {
|
|
46
|
+
std::vector<T> sorted(data.begin(), data.end());
|
|
47
|
+
std::ranges::sort(sorted);
|
|
45
48
|
return median_sorted(sorted);
|
|
46
49
|
}
|
|
47
50
|
|
|
@@ -52,47 +55,75 @@ T mad(const std::vector<T>& data, T med) {
|
|
|
52
55
|
for (auto v : data) {
|
|
53
56
|
res.push_back(std::abs(v - med));
|
|
54
57
|
}
|
|
55
|
-
std::sort(res
|
|
56
|
-
return 1.4826 * median_sorted(res);
|
|
58
|
+
std::ranges::sort(res);
|
|
59
|
+
return static_cast<T>(1.4826) * median_sorted(res);
|
|
57
60
|
}
|
|
58
61
|
|
|
59
62
|
template<typename T>
|
|
60
|
-
std::vector<size_t> detect_anoms(
|
|
61
|
-
|
|
63
|
+
std::vector<size_t> detect_anoms(
|
|
64
|
+
std::span<const T> data,
|
|
65
|
+
size_t num_obs_per_period,
|
|
66
|
+
float k,
|
|
67
|
+
float alpha,
|
|
68
|
+
bool one_tail,
|
|
69
|
+
bool upper_tail,
|
|
70
|
+
bool verbose,
|
|
71
|
+
const std::function<void()>& callback
|
|
72
|
+
) {
|
|
73
|
+
size_t n = data.size();
|
|
62
74
|
|
|
63
75
|
// Check to make sure we have at least two periods worth of data for anomaly context
|
|
64
|
-
if (n < num_obs_per_period
|
|
65
|
-
throw std::invalid_argument
|
|
76
|
+
if (n / 2 < num_obs_per_period) {
|
|
77
|
+
throw std::invalid_argument{"series must contain at least 2 periods"};
|
|
66
78
|
}
|
|
67
79
|
|
|
68
80
|
// Handle NANs
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
81
|
+
bool nans = std::ranges::any_of(data, [](const auto& value) { return std::isnan(value); });
|
|
82
|
+
if (nans) {
|
|
83
|
+
throw std::invalid_argument{"series contains NANs"};
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
if (k < 0) {
|
|
87
|
+
throw std::invalid_argument{"max_anoms must be non-negative"};
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
if (k >= 0.5) {
|
|
91
|
+
throw std::invalid_argument{"max_anoms must be less than 50% of the data points"};
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
if (alpha < 0) {
|
|
95
|
+
throw std::invalid_argument{"alpha must be non-negative"};
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
if (alpha > 0.5) {
|
|
99
|
+
throw std::invalid_argument{"alpha must not be greater than 0.5"};
|
|
74
100
|
}
|
|
75
101
|
|
|
76
102
|
std::vector<T> data2;
|
|
77
103
|
data2.reserve(n);
|
|
78
|
-
|
|
104
|
+
T med = median(data);
|
|
79
105
|
|
|
80
106
|
if (num_obs_per_period > 1) {
|
|
81
107
|
// Decompose data. This returns a univarite remainder which will be used for anomaly detection. Optionally, we might NOT decompose.
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
108
|
+
stl::Stl data_decomp{
|
|
109
|
+
data, num_obs_per_period, {.seasonal_length = data.size() * 10 + 1, .robust = true}
|
|
110
|
+
};
|
|
111
|
+
const std::vector<T>& seasonal = data_decomp.seasonal();
|
|
112
|
+
|
|
113
|
+
// TODO use std::views::zip for C++23
|
|
114
|
+
size_t i = 0;
|
|
115
|
+
for (auto v : data) {
|
|
116
|
+
data2.push_back(v - seasonal.at(i) - med);
|
|
117
|
+
i++;
|
|
87
118
|
}
|
|
88
119
|
} else {
|
|
89
|
-
for (
|
|
90
|
-
data2.push_back(
|
|
120
|
+
for (auto v : data) {
|
|
121
|
+
data2.push_back(v - med);
|
|
91
122
|
}
|
|
92
123
|
}
|
|
93
124
|
|
|
94
|
-
|
|
95
|
-
auto max_outliers = (
|
|
125
|
+
size_t num_anoms = 0;
|
|
126
|
+
auto max_outliers = static_cast<size_t>(static_cast<float>(n) * k);
|
|
96
127
|
std::vector<size_t> anomalies;
|
|
97
128
|
anomalies.reserve(max_outliers);
|
|
98
129
|
|
|
@@ -100,19 +131,19 @@ std::vector<size_t> detect_anoms(const T* data, size_t data_size, size_t num_obs
|
|
|
100
131
|
// Use stable sort for indexes for deterministic results
|
|
101
132
|
std::vector<size_t> indexes(n);
|
|
102
133
|
std::iota(indexes.begin(), indexes.end(), 0);
|
|
103
|
-
std::stable_sort(indexes
|
|
104
|
-
return data2
|
|
134
|
+
std::ranges::stable_sort(indexes, [&data2](size_t a, size_t b) {
|
|
135
|
+
return data2.at(a) < data2.at(b);
|
|
105
136
|
});
|
|
106
|
-
std::sort(data2
|
|
137
|
+
std::ranges::sort(data2);
|
|
107
138
|
|
|
108
139
|
// Compute test statistic until r=max_outliers values have been removed from the sample
|
|
109
|
-
for (
|
|
140
|
+
for (size_t i = 1; i <= max_outliers; i++) {
|
|
110
141
|
if (verbose) {
|
|
111
142
|
std::cout << i << " / " << max_outliers << " completed" << std::endl;
|
|
112
143
|
}
|
|
113
144
|
|
|
114
145
|
// TODO Improve performance between loop iterations
|
|
115
|
-
|
|
146
|
+
T ma = median_sorted(data2);
|
|
116
147
|
std::vector<T> ares;
|
|
117
148
|
ares.reserve(data2.size());
|
|
118
149
|
if (one_tail) {
|
|
@@ -132,31 +163,29 @@ std::vector<size_t> detect_anoms(const T* data, size_t data_size, size_t num_obs
|
|
|
132
163
|
}
|
|
133
164
|
|
|
134
165
|
// Protect against constant time series
|
|
135
|
-
|
|
166
|
+
T data_sigma = mad(data2, ma);
|
|
136
167
|
if (data_sigma == 0.0) {
|
|
137
168
|
break;
|
|
138
169
|
}
|
|
139
170
|
|
|
140
|
-
auto iter = std::max_element(ares
|
|
141
|
-
|
|
171
|
+
auto iter = std::ranges::max_element(ares);
|
|
172
|
+
ptrdiff_t r_idx_i = std::distance(ares.begin(), iter);
|
|
142
173
|
|
|
143
174
|
// Only need to take sigma of r for performance
|
|
144
|
-
|
|
175
|
+
T r = ares.at(static_cast<size_t>(r_idx_i)) / data_sigma;
|
|
145
176
|
|
|
146
|
-
anomalies.push_back(indexes
|
|
177
|
+
anomalies.push_back(indexes.at(static_cast<size_t>(r_idx_i)));
|
|
147
178
|
data2.erase(data2.begin() + r_idx_i);
|
|
148
179
|
indexes.erase(indexes.begin() + r_idx_i);
|
|
149
180
|
|
|
150
181
|
// Compute critical value
|
|
151
|
-
double p
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
} else {
|
|
155
|
-
p = 1.0 - alpha / (2.0 * (n - i + 1));
|
|
156
|
-
}
|
|
182
|
+
double p = one_tail
|
|
183
|
+
? (1.0 - alpha / static_cast<double>(n - i + 1))
|
|
184
|
+
: (1.0 - alpha / (2.0 * static_cast<double>(n - i + 1)));
|
|
157
185
|
|
|
158
|
-
|
|
159
|
-
|
|
186
|
+
double t = students_t_ppf(p, static_cast<double>(n - i - 1));
|
|
187
|
+
double lam = t * static_cast<double>(n - i)
|
|
188
|
+
/ std::sqrt((static_cast<double>(n - i - 1) + t * t) * static_cast<double>(n - i + 1));
|
|
160
189
|
|
|
161
190
|
if (r > lam) {
|
|
162
191
|
num_anoms = i;
|
|
@@ -170,87 +199,69 @@ std::vector<size_t> detect_anoms(const T* data, size_t data_size, size_t num_obs
|
|
|
170
199
|
anomalies.resize(num_anoms);
|
|
171
200
|
|
|
172
201
|
// Sort like R version
|
|
173
|
-
std::sort(anomalies
|
|
202
|
+
std::ranges::sort(anomalies);
|
|
174
203
|
|
|
175
204
|
return anomalies;
|
|
176
205
|
}
|
|
177
206
|
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
/// An anomaly detection result.
|
|
181
|
-
class AnomalyDetectionResult {
|
|
182
|
-
public:
|
|
183
|
-
/// Returns the anomalies.
|
|
184
|
-
std::vector<size_t> anomalies;
|
|
185
|
-
};
|
|
207
|
+
} // namespace detail
|
|
186
208
|
|
|
187
209
|
/// A set of anomaly detection parameters.
|
|
188
|
-
|
|
189
|
-
float alpha_ = 0.05;
|
|
190
|
-
float max_anoms_ = 0.1;
|
|
191
|
-
Direction direction_ = Direction::Both;
|
|
192
|
-
bool verbose_ = false;
|
|
193
|
-
std::function<void()> callback_ = nullptr;
|
|
194
|
-
|
|
195
|
-
public:
|
|
210
|
+
struct AnomalyDetectionParams {
|
|
196
211
|
/// Sets the level of statistical significance.
|
|
197
|
-
|
|
198
|
-
this->alpha_ = alpha;
|
|
199
|
-
return *this;
|
|
200
|
-
};
|
|
201
|
-
|
|
212
|
+
float alpha = 0.05f;
|
|
202
213
|
/// Sets the maximum number of anomalies as percent of data.
|
|
203
|
-
|
|
204
|
-
this->max_anoms_ = max_anoms;
|
|
205
|
-
return *this;
|
|
206
|
-
};
|
|
207
|
-
|
|
214
|
+
float max_anoms = 0.1f;
|
|
208
215
|
/// Sets the direction.
|
|
209
|
-
|
|
210
|
-
this->direction_ = direction;
|
|
211
|
-
return *this;
|
|
212
|
-
};
|
|
213
|
-
|
|
216
|
+
Direction direction = Direction::Both;
|
|
214
217
|
/// Sets whether to show progress.
|
|
215
|
-
|
|
216
|
-
this->verbose_ = verbose;
|
|
217
|
-
return *this;
|
|
218
|
-
};
|
|
219
|
-
|
|
218
|
+
bool verbose = false;
|
|
220
219
|
/// Sets a callback for each iteration.
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
return *this;
|
|
224
|
-
};
|
|
220
|
+
std::function<void()> callback = nullptr;
|
|
221
|
+
};
|
|
225
222
|
|
|
226
|
-
|
|
223
|
+
/// An anomaly detection result.
|
|
224
|
+
class AnomalyDetection {
|
|
225
|
+
public:
|
|
226
|
+
/// Detects anomalies in a time series from a span.
|
|
227
227
|
template<typename T>
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
228
|
+
AnomalyDetection(
|
|
229
|
+
std::span<const T> series,
|
|
230
|
+
size_t period,
|
|
231
|
+
const AnomalyDetectionParams& params = AnomalyDetectionParams()
|
|
232
|
+
) {
|
|
233
|
+
bool one_tail = params.direction != Direction::Both;
|
|
234
|
+
bool upper_tail = params.direction == Direction::Positive;
|
|
235
|
+
|
|
236
|
+
std::vector<size_t> anomalies = detail::detect_anoms(
|
|
237
|
+
series,
|
|
238
|
+
period,
|
|
239
|
+
params.max_anoms,
|
|
240
|
+
params.alpha,
|
|
241
|
+
one_tail,
|
|
242
|
+
upper_tail,
|
|
243
|
+
params.verbose,
|
|
244
|
+
params.callback
|
|
245
|
+
);
|
|
246
|
+
anomalies_ = std::move(anomalies);
|
|
234
247
|
}
|
|
235
248
|
|
|
236
249
|
/// Detects anomalies in a time series from a vector.
|
|
237
250
|
template<typename T>
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
251
|
+
AnomalyDetection(
|
|
252
|
+
const std::vector<T>& series,
|
|
253
|
+
size_t period,
|
|
254
|
+
const AnomalyDetectionParams& params = AnomalyDetectionParams()
|
|
255
|
+
) :
|
|
256
|
+
AnomalyDetection(std::span<const T>{series}, period, params) {}
|
|
241
257
|
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
inline AnomalyDetectionResult fit(std::span<const T> series, size_t period) const {
|
|
246
|
-
return fit(series.data(), series.size(), period);
|
|
258
|
+
/// Returns the anomalies.
|
|
259
|
+
const std::vector<size_t>& anomalies() const {
|
|
260
|
+
return anomalies_;
|
|
247
261
|
}
|
|
248
|
-
#endif
|
|
249
|
-
};
|
|
250
262
|
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
}
|
|
263
|
+
private:
|
|
264
|
+
std::vector<size_t> anomalies_;
|
|
265
|
+
};
|
|
255
266
|
|
|
256
|
-
}
|
|
267
|
+
} // namespace anomaly_detection
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
|
|
2
|
-
*
|
|
1
|
+
/*
|
|
2
|
+
* Dist C v0.3.1
|
|
3
3
|
* https://github.com/ankane/dist.h
|
|
4
4
|
* Unlicense OR MIT License
|
|
5
5
|
*/
|
|
@@ -8,6 +8,8 @@
|
|
|
8
8
|
|
|
9
9
|
#include <math.h>
|
|
10
10
|
|
|
11
|
+
/// @cond
|
|
12
|
+
|
|
11
13
|
#ifdef M_E
|
|
12
14
|
#define DIST_E M_E
|
|
13
15
|
#else
|
|
@@ -26,7 +28,14 @@
|
|
|
26
28
|
#define DIST_SQRT2 1.41421356237309504880
|
|
27
29
|
#endif
|
|
28
30
|
|
|
29
|
-
|
|
31
|
+
/// @endcond
|
|
32
|
+
|
|
33
|
+
#ifdef __cplusplus
|
|
34
|
+
extern "C" {
|
|
35
|
+
#endif
|
|
36
|
+
|
|
37
|
+
/// Returns the probability density function (PDF) of the normal distribution.
|
|
38
|
+
static inline double normal_pdf(double x, double mean, double std_dev) {
|
|
30
39
|
if (std_dev <= 0) {
|
|
31
40
|
return NAN;
|
|
32
41
|
}
|
|
@@ -35,7 +44,8 @@ double normal_pdf(double x, double mean, double std_dev) {
|
|
|
35
44
|
return (1.0 / (std_dev * sqrt(2.0 * DIST_PI))) * pow(DIST_E, -0.5 * n * n);
|
|
36
45
|
}
|
|
37
46
|
|
|
38
|
-
|
|
47
|
+
/// Returns the cumulative distribution function (CDF) of the normal distribution.
|
|
48
|
+
static inline double normal_cdf(double x, double mean, double std_dev) {
|
|
39
49
|
if (std_dev <= 0) {
|
|
40
50
|
return NAN;
|
|
41
51
|
}
|
|
@@ -43,10 +53,11 @@ double normal_cdf(double x, double mean, double std_dev) {
|
|
|
43
53
|
return 0.5 * (1.0 + erf((x - mean) / (std_dev * DIST_SQRT2)));
|
|
44
54
|
}
|
|
45
55
|
|
|
56
|
+
/// Returns the percent-point/quantile function (PPF) of the normal distribution.
|
|
46
57
|
// Wichura, M. J. (1988).
|
|
47
58
|
// Algorithm AS 241: The Percentage Points of the Normal Distribution.
|
|
48
59
|
// Journal of the Royal Statistical Society. Series C (Applied Statistics), 37(3), 477-484.
|
|
49
|
-
double normal_ppf(double p, double mean, double std_dev) {
|
|
60
|
+
static inline double normal_ppf(double p, double mean, double std_dev) {
|
|
50
61
|
if (p < 0 || p > 1 || std_dev <= 0 || isnan(mean) || isnan(std_dev)) {
|
|
51
62
|
return NAN;
|
|
52
63
|
}
|
|
@@ -83,7 +94,8 @@ double normal_ppf(double p, double mean, double std_dev) {
|
|
|
83
94
|
}
|
|
84
95
|
}
|
|
85
96
|
|
|
86
|
-
|
|
97
|
+
/// Returns the probability density function (PDF) of the Student's t distribution.
|
|
98
|
+
static inline double students_t_pdf(double x, double n) {
|
|
87
99
|
if (n <= 0) {
|
|
88
100
|
return NAN;
|
|
89
101
|
}
|
|
@@ -95,10 +107,11 @@ double students_t_pdf(double x, double n) {
|
|
|
95
107
|
return tgamma((n + 1.0) / 2.0) / (sqrt(n * DIST_PI) * tgamma(n / 2.0)) * pow(1.0 + x * x / n, -(n + 1.0) / 2.0);
|
|
96
108
|
}
|
|
97
109
|
|
|
110
|
+
/// Returns the cumulative distribution function (CDF) of the Student's t distribution.
|
|
98
111
|
// Hill, G. W. (1970).
|
|
99
112
|
// Algorithm 395: Student's t-distribution.
|
|
100
113
|
// Communications of the ACM, 13(10), 617-619.
|
|
101
|
-
double students_t_cdf(double x, double n) {
|
|
114
|
+
static inline double students_t_cdf(double x, double n) {
|
|
102
115
|
if (n < 1) {
|
|
103
116
|
return NAN;
|
|
104
117
|
}
|
|
@@ -159,7 +172,7 @@ double students_t_cdf(double x, double n) {
|
|
|
159
172
|
return start + sign * (z - a) / 2;
|
|
160
173
|
}
|
|
161
174
|
|
|
162
|
-
// tail series
|
|
175
|
+
// tail series expansion for large t-values
|
|
163
176
|
double a = sqrt(b);
|
|
164
177
|
y = a * n;
|
|
165
178
|
int j = 0;
|
|
@@ -182,10 +195,11 @@ double students_t_cdf(double x, double n) {
|
|
|
182
195
|
return start + sign * (z - a) / 2;
|
|
183
196
|
}
|
|
184
197
|
|
|
198
|
+
/// Returns the percent-point/quantile function (PPF) of the Student's t distribution.
|
|
185
199
|
// Hill, G. W. (1970).
|
|
186
200
|
// Algorithm 396: Student's t-quantiles.
|
|
187
201
|
// Communications of the ACM, 13(10), 619-620.
|
|
188
|
-
double students_t_ppf(double p, double n) {
|
|
202
|
+
static inline double students_t_ppf(double p, double n) {
|
|
189
203
|
if (p < 0 || p > 1 || n < 1) {
|
|
190
204
|
return NAN;
|
|
191
205
|
}
|
|
@@ -234,3 +248,11 @@ double students_t_ppf(double p, double n) {
|
|
|
234
248
|
}
|
|
235
249
|
return sign * sqrt(n * y);
|
|
236
250
|
}
|
|
251
|
+
|
|
252
|
+
#ifdef __cplusplus
|
|
253
|
+
}
|
|
254
|
+
#endif
|
|
255
|
+
|
|
256
|
+
#undef DIST_E
|
|
257
|
+
#undef DIST_PI
|
|
258
|
+
#undef DIST_SQRT2
|
|
@@ -1,21 +1,27 @@
|
|
|
1
|
+
#include <cstddef>
|
|
2
|
+
#include <stdexcept>
|
|
1
3
|
#include <string>
|
|
2
4
|
#include <vector>
|
|
3
5
|
|
|
4
6
|
#include <rice/rice.hpp>
|
|
5
|
-
#include <rice/stl.hpp>
|
|
6
7
|
|
|
7
8
|
#include "anomaly_detection.hpp"
|
|
8
9
|
|
|
10
|
+
using anomaly_detection::AnomalyDetection;
|
|
11
|
+
using anomaly_detection::AnomalyDetectionParams;
|
|
9
12
|
using anomaly_detection::Direction;
|
|
10
13
|
|
|
11
14
|
extern "C"
|
|
12
15
|
void Init_ext() {
|
|
13
|
-
|
|
16
|
+
Rice::Module rb_mAnomalyDetection = Rice::define_module("AnomalyDetection");
|
|
14
17
|
|
|
15
18
|
rb_mAnomalyDetection
|
|
16
19
|
.define_singleton_function(
|
|
17
20
|
"_detect",
|
|
18
|
-
[](
|
|
21
|
+
[](Rice::Array rb_series, size_t period, float k, float alpha, Rice::String rb_direction, bool verbose) {
|
|
22
|
+
std::vector<float> series = rb_series.to_vector<float>();
|
|
23
|
+
std::string direction = rb_direction.str();
|
|
24
|
+
|
|
19
25
|
Direction dir;
|
|
20
26
|
if (direction == "pos") {
|
|
21
27
|
dir = Direction::Positive;
|
|
@@ -27,16 +33,17 @@ void Init_ext() {
|
|
|
27
33
|
throw std::invalid_argument("direction must be pos, neg, or both");
|
|
28
34
|
}
|
|
29
35
|
|
|
30
|
-
|
|
31
|
-
.
|
|
32
|
-
.
|
|
33
|
-
.direction
|
|
34
|
-
.verbose
|
|
35
|
-
.callback
|
|
36
|
-
|
|
36
|
+
AnomalyDetectionParams params{
|
|
37
|
+
.alpha = alpha,
|
|
38
|
+
.max_anoms = k,
|
|
39
|
+
.direction = dir,
|
|
40
|
+
.verbose = verbose,
|
|
41
|
+
.callback = rb_thread_check_ints
|
|
42
|
+
};
|
|
43
|
+
AnomalyDetection res{series, period, params};
|
|
37
44
|
|
|
38
|
-
|
|
39
|
-
for (auto v : res.anomalies) {
|
|
45
|
+
Rice::Array a;
|
|
46
|
+
for (const auto v : res.anomalies()) {
|
|
40
47
|
a.push(v, false);
|
|
41
48
|
}
|
|
42
49
|
return a;
|