anomaly_detection 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +3 -3
- data/ext/anomaly_detection/anomaly_detection.hpp +76 -30
- data/ext/anomaly_detection/ext.cpp +4 -1
- data/ext/anomaly_detection/stl.hpp +415 -100
- data/lib/anomaly_detection/version.rb +1 -1
- metadata +5 -9
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 8321bd70889b5f58c93c1e0830513c2984ec6512ec4e9b1418c075fe475485a6
|
|
4
|
+
data.tar.gz: e62683ce7f8eb5d7e6451252d50b226bb8d0b585c1b9778a193cb1fb8365ff0a
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 48d887f12f33052e6f5c428cba99ff8a625e269591a03c51c4b3714326c32155154ec494eeefc95a48bbbe115ce14469d26c0fabfd3d1e3453aafaa19ca70a3b
|
|
7
|
+
data.tar.gz: 03bd43a3819a44218e53ffdf556c2d8d2993e3f6acb8819da1b935682b519ec2c203238e4c38f8d3c0ae7eb278d358021dae978c32b1a88aabf18de520077b55
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
|
@@ -20,9 +20,9 @@ Detect anomalies in a time series
|
|
|
20
20
|
|
|
21
21
|
```ruby
|
|
22
22
|
series = {
|
|
23
|
-
Date.parse("
|
|
24
|
-
Date.parse("
|
|
25
|
-
Date.parse("
|
|
23
|
+
Date.parse("2025-01-01") => 100,
|
|
24
|
+
Date.parse("2025-01-02") => 150,
|
|
25
|
+
Date.parse("2025-01-03") => 136,
|
|
26
26
|
# ...
|
|
27
27
|
}
|
|
28
28
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*!
|
|
2
|
-
* AnomalyDetection.cpp v0.1
|
|
2
|
+
* AnomalyDetection.cpp v0.2.1
|
|
3
3
|
* https://github.com/ankane/AnomalyDetection.cpp
|
|
4
4
|
* GPL-3.0-or-later License
|
|
5
5
|
*/
|
|
@@ -12,35 +12,53 @@
|
|
|
12
12
|
#include <numeric>
|
|
13
13
|
#include <vector>
|
|
14
14
|
|
|
15
|
+
#if __cplusplus >= 202002L
|
|
16
|
+
#include <span>
|
|
17
|
+
#endif
|
|
18
|
+
|
|
15
19
|
#include "dist.h"
|
|
16
20
|
#include "stl.hpp"
|
|
17
21
|
|
|
18
22
|
namespace anomaly_detection {
|
|
19
23
|
|
|
20
|
-
|
|
24
|
+
/// The direction to detect anomalies.
|
|
25
|
+
enum class Direction {
|
|
26
|
+
/// Positive direction.
|
|
27
|
+
Positive,
|
|
28
|
+
/// Negative direction.
|
|
29
|
+
Negative,
|
|
30
|
+
/// Both directions.
|
|
31
|
+
Both
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
namespace {
|
|
21
35
|
|
|
22
|
-
|
|
36
|
+
template<typename T>
|
|
37
|
+
T median_sorted(const std::vector<T>& sorted) {
|
|
23
38
|
return (sorted[(sorted.size() - 1) / 2] + sorted[sorted.size() / 2]) / 2.0;
|
|
24
39
|
}
|
|
25
40
|
|
|
26
|
-
|
|
27
|
-
|
|
41
|
+
template<typename T>
|
|
42
|
+
T median(const T* data, size_t data_size) {
|
|
43
|
+
std::vector<T> sorted(data, data + data_size);
|
|
28
44
|
std::sort(sorted.begin(), sorted.end());
|
|
29
45
|
return median_sorted(sorted);
|
|
30
46
|
}
|
|
31
47
|
|
|
32
|
-
|
|
33
|
-
|
|
48
|
+
template<typename T>
|
|
49
|
+
T mad(const std::vector<T>& data, T med) {
|
|
50
|
+
std::vector<T> res;
|
|
34
51
|
res.reserve(data.size());
|
|
35
52
|
for (auto v : data) {
|
|
36
|
-
res.push_back(
|
|
53
|
+
res.push_back(std::abs(v - med));
|
|
37
54
|
}
|
|
38
55
|
std::sort(res.begin(), res.end());
|
|
39
56
|
return 1.4826 * median_sorted(res);
|
|
40
57
|
}
|
|
41
58
|
|
|
42
|
-
|
|
43
|
-
|
|
59
|
+
template<typename T>
|
|
60
|
+
std::vector<size_t> detect_anoms(const T* data, size_t data_size, size_t num_obs_per_period, float k, float alpha, bool one_tail, bool upper_tail, bool verbose, std::function<void()> callback) {
|
|
61
|
+
auto n = data_size;
|
|
44
62
|
|
|
45
63
|
// Check to make sure we have at least two periods worth of data for anomaly context
|
|
46
64
|
if (n < num_obs_per_period * 2) {
|
|
@@ -48,18 +66,20 @@ std::vector<size_t> detect_anoms(const std::vector<float>& data, size_t num_obs_
|
|
|
48
66
|
}
|
|
49
67
|
|
|
50
68
|
// Handle NANs
|
|
51
|
-
auto nan = std::count_if(data
|
|
69
|
+
auto nan = std::count_if(data, data + data_size, [](const auto& value) {
|
|
70
|
+
return std::isnan(value);
|
|
71
|
+
});
|
|
52
72
|
if (nan > 0) {
|
|
53
73
|
throw std::invalid_argument("series contains NANs");
|
|
54
74
|
}
|
|
55
75
|
|
|
56
|
-
std::vector<
|
|
76
|
+
std::vector<T> data2;
|
|
57
77
|
data2.reserve(n);
|
|
58
|
-
auto med = median(data);
|
|
78
|
+
auto med = median(data, data_size);
|
|
59
79
|
|
|
60
80
|
if (num_obs_per_period > 1) {
|
|
61
81
|
// Decompose data. This returns a univarite remainder which will be used for anomaly detection. Optionally, we might NOT decompose.
|
|
62
|
-
auto data_decomp = stl::params().robust(true).seasonal_length(
|
|
82
|
+
auto data_decomp = stl::params().robust(true).seasonal_length(data_size * 10 + 1).fit(data, data_size, num_obs_per_period);
|
|
63
83
|
auto seasonal = data_decomp.seasonal;
|
|
64
84
|
|
|
65
85
|
for (size_t i = 0; i < n; i++) {
|
|
@@ -80,7 +100,9 @@ std::vector<size_t> detect_anoms(const std::vector<float>& data, size_t num_obs_
|
|
|
80
100
|
// Use stable sort for indexes for deterministic results
|
|
81
101
|
std::vector<size_t> indexes(n);
|
|
82
102
|
std::iota(indexes.begin(), indexes.end(), 0);
|
|
83
|
-
std::stable_sort(indexes.begin(), indexes.end(), [&data2](size_t a, size_t b) {
|
|
103
|
+
std::stable_sort(indexes.begin(), indexes.end(), [&data2](size_t a, size_t b) {
|
|
104
|
+
return data2[a] < data2[b];
|
|
105
|
+
});
|
|
84
106
|
std::sort(data2.begin(), data2.end());
|
|
85
107
|
|
|
86
108
|
// Compute test statistic until r=max_outliers values have been removed from the sample
|
|
@@ -91,7 +113,7 @@ std::vector<size_t> detect_anoms(const std::vector<float>& data, size_t num_obs_
|
|
|
91
113
|
|
|
92
114
|
// TODO Improve performance between loop iterations
|
|
93
115
|
auto ma = median_sorted(data2);
|
|
94
|
-
std::vector<
|
|
116
|
+
std::vector<T> ares;
|
|
95
117
|
ares.reserve(data2.size());
|
|
96
118
|
if (one_tail) {
|
|
97
119
|
if (upper_tail) {
|
|
@@ -105,7 +127,7 @@ std::vector<size_t> detect_anoms(const std::vector<float>& data, size_t num_obs_
|
|
|
105
127
|
}
|
|
106
128
|
} else {
|
|
107
129
|
for (auto v : data2) {
|
|
108
|
-
ares.push_back(
|
|
130
|
+
ares.push_back(std::abs(v - ma));
|
|
109
131
|
}
|
|
110
132
|
}
|
|
111
133
|
|
|
@@ -126,7 +148,7 @@ std::vector<size_t> detect_anoms(const std::vector<float>& data, size_t num_obs_
|
|
|
126
148
|
indexes.erase(indexes.begin() + r_idx_i);
|
|
127
149
|
|
|
128
150
|
// Compute critical value
|
|
129
|
-
|
|
151
|
+
double p;
|
|
130
152
|
if (one_tail) {
|
|
131
153
|
p = 1.0 - alpha / (n - i + 1);
|
|
132
154
|
} else {
|
|
@@ -134,7 +156,7 @@ std::vector<size_t> detect_anoms(const std::vector<float>& data, size_t num_obs_
|
|
|
134
156
|
}
|
|
135
157
|
|
|
136
158
|
auto t = students_t_ppf(p, n - i - 1);
|
|
137
|
-
auto lam = t * (n - i) / sqrt(((n - i - 1) + t * t) * (n - i + 1));
|
|
159
|
+
auto lam = t * (n - i) / std::sqrt(((n - i - 1) + t * t) * (n - i + 1));
|
|
138
160
|
|
|
139
161
|
if (r > lam) {
|
|
140
162
|
num_anoms = i;
|
|
@@ -153,11 +175,16 @@ std::vector<size_t> detect_anoms(const std::vector<float>& data, size_t num_obs_
|
|
|
153
175
|
return anomalies;
|
|
154
176
|
}
|
|
155
177
|
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
/// An anomaly detection result.
|
|
156
181
|
class AnomalyDetectionResult {
|
|
157
182
|
public:
|
|
183
|
+
/// Returns the anomalies.
|
|
158
184
|
std::vector<size_t> anomalies;
|
|
159
185
|
};
|
|
160
186
|
|
|
187
|
+
/// A set of anomaly detection parameters.
|
|
161
188
|
class AnomalyDetectionParams {
|
|
162
189
|
float alpha_ = 0.05;
|
|
163
190
|
float max_anoms_ = 0.1;
|
|
@@ -166,45 +193,64 @@ class AnomalyDetectionParams {
|
|
|
166
193
|
std::function<void()> callback_ = nullptr;
|
|
167
194
|
|
|
168
195
|
public:
|
|
196
|
+
/// Sets the level of statistical significance.
|
|
169
197
|
inline AnomalyDetectionParams alpha(float alpha) {
|
|
170
198
|
this->alpha_ = alpha;
|
|
171
199
|
return *this;
|
|
172
200
|
};
|
|
173
201
|
|
|
202
|
+
/// Sets the maximum number of anomalies as percent of data.
|
|
174
203
|
inline AnomalyDetectionParams max_anoms(float max_anoms) {
|
|
175
204
|
this->max_anoms_ = max_anoms;
|
|
176
205
|
return *this;
|
|
177
206
|
};
|
|
178
207
|
|
|
208
|
+
/// Sets the direction.
|
|
179
209
|
inline AnomalyDetectionParams direction(Direction direction) {
|
|
180
210
|
this->direction_ = direction;
|
|
181
211
|
return *this;
|
|
182
212
|
};
|
|
183
213
|
|
|
214
|
+
/// Sets whether to show progress.
|
|
184
215
|
inline AnomalyDetectionParams verbose(bool verbose) {
|
|
185
216
|
this->verbose_ = verbose;
|
|
186
217
|
return *this;
|
|
187
218
|
};
|
|
188
219
|
|
|
220
|
+
/// Sets a callback for each iteration.
|
|
189
221
|
inline AnomalyDetectionParams callback(std::function<void()> callback) {
|
|
190
222
|
this->callback_ = callback;
|
|
191
223
|
return *this;
|
|
192
224
|
};
|
|
193
225
|
|
|
194
|
-
|
|
195
|
-
|
|
226
|
+
/// Detects anomalies in a time series from an array.
|
|
227
|
+
template<typename T>
|
|
228
|
+
inline AnomalyDetectionResult fit(const T* series, size_t series_size, size_t period) const {
|
|
229
|
+
bool one_tail = this->direction_ != Direction::Both;
|
|
230
|
+
bool upper_tail = this->direction_ == Direction::Positive;
|
|
196
231
|
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
}
|
|
232
|
+
auto anomalies = detect_anoms(series, series_size, period, this->max_anoms_, this->alpha_, one_tail, upper_tail, this->verbose_, this->callback_);
|
|
233
|
+
return AnomalyDetectionResult { anomalies };
|
|
234
|
+
}
|
|
200
235
|
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
236
|
+
/// Detects anomalies in a time series from a vector.
|
|
237
|
+
template<typename T>
|
|
238
|
+
inline AnomalyDetectionResult fit(const std::vector<T>& series, size_t period) const {
|
|
239
|
+
return fit(series.data(), series.size(), period);
|
|
240
|
+
}
|
|
204
241
|
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
242
|
+
#if __cplusplus >= 202002L
|
|
243
|
+
/// Detects anomalies in a time series from a span.
|
|
244
|
+
template<typename T>
|
|
245
|
+
inline AnomalyDetectionResult fit(std::span<const T> series, size_t period) const {
|
|
246
|
+
return fit(series.data(), series.size(), period);
|
|
247
|
+
}
|
|
248
|
+
#endif
|
|
249
|
+
};
|
|
250
|
+
|
|
251
|
+
/// Creates a new set of parameters.
|
|
252
|
+
inline AnomalyDetectionParams params() {
|
|
253
|
+
return AnomalyDetectionParams();
|
|
208
254
|
}
|
|
209
255
|
|
|
210
256
|
}
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
#include <string>
|
|
2
|
+
#include <vector>
|
|
3
|
+
|
|
1
4
|
#include <rice/rice.hpp>
|
|
2
5
|
#include <rice/stl.hpp>
|
|
3
6
|
|
|
@@ -34,7 +37,7 @@ void Init_ext() {
|
|
|
34
37
|
|
|
35
38
|
auto a = Rice::Array();
|
|
36
39
|
for (auto v : res.anomalies) {
|
|
37
|
-
a.push(v);
|
|
40
|
+
a.push(v, false);
|
|
38
41
|
}
|
|
39
42
|
return a;
|
|
40
43
|
});
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*!
|
|
2
|
-
* STL C++ v0.
|
|
2
|
+
* STL C++ v0.2.0
|
|
3
3
|
* https://github.com/ankane/stl-cpp
|
|
4
4
|
* Unlicense OR MIT License
|
|
5
5
|
*
|
|
@@ -8,6 +8,10 @@
|
|
|
8
8
|
* Cleveland, R. B., Cleveland, W. S., McRae, J. E., & Terpenning, I. (1990).
|
|
9
9
|
* STL: A Seasonal-Trend Decomposition Procedure Based on Loess.
|
|
10
10
|
* Journal of Official Statistics, 6(1), 3-33.
|
|
11
|
+
*
|
|
12
|
+
* Bandara, K., Hyndman, R. J., & Bergmeir, C. (2021).
|
|
13
|
+
* MSTL: A Seasonal-Trend Decomposition Algorithm for Time Series with Multiple Seasonal Patterns.
|
|
14
|
+
* arXiv:2107.13462 [stat.AP]. https://doi.org/10.48550/arXiv.2107.13462
|
|
11
15
|
*/
|
|
12
16
|
|
|
13
17
|
#pragma once
|
|
@@ -17,16 +21,24 @@
|
|
|
17
21
|
#include <numeric>
|
|
18
22
|
#include <optional>
|
|
19
23
|
#include <stdexcept>
|
|
24
|
+
#include <tuple>
|
|
20
25
|
#include <vector>
|
|
21
26
|
|
|
27
|
+
#if __cplusplus >= 202002L
|
|
28
|
+
#include <span>
|
|
29
|
+
#endif
|
|
30
|
+
|
|
22
31
|
namespace stl {
|
|
23
32
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
33
|
+
namespace {
|
|
34
|
+
|
|
35
|
+
template<typename T>
|
|
36
|
+
bool est(const T* y, size_t n, size_t len, int ideg, T xs, T* ys, size_t nleft, size_t nright, T* w, bool userw, const T* rw) {
|
|
37
|
+
auto range = ((T) n) - 1.0;
|
|
38
|
+
auto h = std::max(xs - ((T) nleft), ((T) nright) - xs);
|
|
27
39
|
|
|
28
40
|
if (len > n) {
|
|
29
|
-
h += (
|
|
41
|
+
h += (T) ((len - n) / 2);
|
|
30
42
|
}
|
|
31
43
|
|
|
32
44
|
auto h9 = 0.999 * h;
|
|
@@ -36,12 +48,12 @@ bool est(const float* y, size_t n, size_t len, int ideg, float xs, float* ys, si
|
|
|
36
48
|
auto a = 0.0;
|
|
37
49
|
for (auto j = nleft; j <= nright; j++) {
|
|
38
50
|
w[j - 1] = 0.0;
|
|
39
|
-
auto r =
|
|
51
|
+
auto r = std::abs(((T) j) - xs);
|
|
40
52
|
if (r <= h9) {
|
|
41
53
|
if (r <= h1) {
|
|
42
54
|
w[j - 1] = 1.0;
|
|
43
55
|
} else {
|
|
44
|
-
w[j - 1] = pow(1.0 - pow(r / h, 3), 3);
|
|
56
|
+
w[j - 1] = (T) std::pow(1.0 - std::pow(r / h, 3), 3);
|
|
45
57
|
}
|
|
46
58
|
if (userw) {
|
|
47
59
|
w[j - 1] *= rw[j - 1];
|
|
@@ -54,25 +66,25 @@ bool est(const float* y, size_t n, size_t len, int ideg, float xs, float* ys, si
|
|
|
54
66
|
return false;
|
|
55
67
|
} else { // weighted least squares
|
|
56
68
|
for (auto j = nleft; j <= nright; j++) { // make sum of w(j) == 1
|
|
57
|
-
w[j - 1] /= a;
|
|
69
|
+
w[j - 1] /= (T) a;
|
|
58
70
|
}
|
|
59
71
|
|
|
60
72
|
if (h > 0.0 && ideg > 0) { // use linear fit
|
|
61
73
|
auto a = 0.0;
|
|
62
74
|
for (auto j = nleft; j <= nright; j++) { // weighted center of x values
|
|
63
|
-
a += w[j - 1] * ((
|
|
75
|
+
a += w[j - 1] * ((T) j);
|
|
64
76
|
}
|
|
65
77
|
auto b = xs - a;
|
|
66
78
|
auto c = 0.0;
|
|
67
79
|
for (auto j = nleft; j <= nright; j++) {
|
|
68
|
-
c += w[j - 1] * pow(((
|
|
80
|
+
c += w[j - 1] * std::pow(((T) j) - a, 2);
|
|
69
81
|
}
|
|
70
|
-
if (sqrt(c) > 0.001 * range) {
|
|
82
|
+
if (std::sqrt(c) > 0.001 * range) {
|
|
71
83
|
b /= c;
|
|
72
84
|
|
|
73
85
|
// points are spread out enough to compute slope
|
|
74
86
|
for (auto j = nleft; j <= nright; j++) {
|
|
75
|
-
w[j - 1] *= b * (((
|
|
87
|
+
w[j - 1] *= (T) (b * (((T) j) - a) + 1.0);
|
|
76
88
|
}
|
|
77
89
|
}
|
|
78
90
|
}
|
|
@@ -86,7 +98,8 @@ bool est(const float* y, size_t n, size_t len, int ideg, float xs, float* ys, si
|
|
|
86
98
|
}
|
|
87
99
|
}
|
|
88
100
|
|
|
89
|
-
|
|
101
|
+
template<typename T>
|
|
102
|
+
void ess(const T* y, size_t n, size_t len, int ideg, size_t njump, bool userw, const T* rw, T* ys, T* res) {
|
|
90
103
|
if (n < 2) {
|
|
91
104
|
ys[0] = y[0];
|
|
92
105
|
return;
|
|
@@ -100,7 +113,7 @@ void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool user
|
|
|
100
113
|
nleft = 1;
|
|
101
114
|
nright = n;
|
|
102
115
|
for (size_t i = 1; i <= n; i += newnj) {
|
|
103
|
-
auto ok = est(y, n, len, ideg, (
|
|
116
|
+
auto ok = est(y, n, len, ideg, (T) i, &ys[i - 1], nleft, nright, res, userw, rw);
|
|
104
117
|
if (!ok) {
|
|
105
118
|
ys[i - 1] = y[i - 1];
|
|
106
119
|
}
|
|
@@ -114,7 +127,7 @@ void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool user
|
|
|
114
127
|
nleft += 1;
|
|
115
128
|
nright += 1;
|
|
116
129
|
}
|
|
117
|
-
auto ok = est(y, n, len, ideg, (
|
|
130
|
+
auto ok = est(y, n, len, ideg, (T) i, &ys[i - 1], nleft, nright, res, userw, rw);
|
|
118
131
|
if (!ok) {
|
|
119
132
|
ys[i - 1] = y[i - 1];
|
|
120
133
|
}
|
|
@@ -132,7 +145,7 @@ void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool user
|
|
|
132
145
|
nleft = i - nsh + 1;
|
|
133
146
|
nright = len + i - nsh;
|
|
134
147
|
}
|
|
135
|
-
auto ok = est(y, n, len, ideg, (
|
|
148
|
+
auto ok = est(y, n, len, ideg, (T) i, &ys[i - 1], nleft, nright, res, userw, rw);
|
|
136
149
|
if (!ok) {
|
|
137
150
|
ys[i - 1] = y[i - 1];
|
|
138
151
|
}
|
|
@@ -141,60 +154,63 @@ void ess(const float* y, size_t n, size_t len, int ideg, size_t njump, bool user
|
|
|
141
154
|
|
|
142
155
|
if (newnj != 1) {
|
|
143
156
|
for (size_t i = 1; i <= n - newnj; i += newnj) {
|
|
144
|
-
auto delta = (ys[i + newnj - 1] - ys[i - 1]) / ((
|
|
157
|
+
auto delta = (ys[i + newnj - 1] - ys[i - 1]) / ((T) newnj);
|
|
145
158
|
for (auto j = i + 1; j <= i + newnj - 1; j++) {
|
|
146
|
-
ys[j - 1] = ys[i - 1] + delta * ((
|
|
159
|
+
ys[j - 1] = ys[i - 1] + delta * ((T) (j - i));
|
|
147
160
|
}
|
|
148
161
|
}
|
|
149
162
|
auto k = ((n - 1) / newnj) * newnj + 1;
|
|
150
163
|
if (k != n) {
|
|
151
|
-
auto ok = est(y, n, len, ideg, (
|
|
164
|
+
auto ok = est(y, n, len, ideg, (T) n, &ys[n - 1], nleft, nright, res, userw, rw);
|
|
152
165
|
if (!ok) {
|
|
153
166
|
ys[n - 1] = y[n - 1];
|
|
154
167
|
}
|
|
155
168
|
if (k != n - 1) {
|
|
156
|
-
auto delta = (ys[n - 1] - ys[k - 1]) / ((
|
|
169
|
+
auto delta = (ys[n - 1] - ys[k - 1]) / ((T) (n - k));
|
|
157
170
|
for (auto j = k + 1; j <= n - 1; j++) {
|
|
158
|
-
ys[j - 1] = ys[k - 1] + delta * ((
|
|
171
|
+
ys[j - 1] = ys[k - 1] + delta * ((T) (j - k));
|
|
159
172
|
}
|
|
160
173
|
}
|
|
161
174
|
}
|
|
162
175
|
}
|
|
163
176
|
}
|
|
164
177
|
|
|
165
|
-
|
|
178
|
+
template<typename T>
|
|
179
|
+
void ma(const T* x, size_t n, size_t len, T* ave) {
|
|
166
180
|
auto newn = n - len + 1;
|
|
167
|
-
|
|
168
|
-
|
|
181
|
+
double flen = (T) len;
|
|
182
|
+
double v = 0.0;
|
|
169
183
|
|
|
170
184
|
// get the first average
|
|
171
185
|
for (size_t i = 0; i < len; i++) {
|
|
172
186
|
v += x[i];
|
|
173
187
|
}
|
|
174
188
|
|
|
175
|
-
ave[0] = v / flen;
|
|
189
|
+
ave[0] = (T) (v / flen);
|
|
176
190
|
if (newn > 1) {
|
|
177
|
-
|
|
178
|
-
|
|
191
|
+
size_t k = len;
|
|
192
|
+
size_t m = 0;
|
|
179
193
|
for (size_t j = 1; j < newn; j++) {
|
|
180
194
|
// window down the array
|
|
181
195
|
v = v - x[m] + x[k];
|
|
182
|
-
ave[j] = v / flen;
|
|
196
|
+
ave[j] = (T) (v / flen);
|
|
183
197
|
k += 1;
|
|
184
198
|
m += 1;
|
|
185
199
|
}
|
|
186
200
|
}
|
|
187
201
|
}
|
|
188
202
|
|
|
189
|
-
|
|
203
|
+
template<typename T>
|
|
204
|
+
void fts(const T* x, size_t n, size_t np, T* trend, T* work) {
|
|
190
205
|
ma(x, n, np, trend);
|
|
191
206
|
ma(trend, n - np + 1, np, work);
|
|
192
207
|
ma(work, n - 2 * np + 2, 3, trend);
|
|
193
208
|
}
|
|
194
209
|
|
|
195
|
-
|
|
210
|
+
template<typename T>
|
|
211
|
+
void rwts(const T* y, size_t n, const T* fit, T* rw) {
|
|
196
212
|
for (size_t i = 0; i < n; i++) {
|
|
197
|
-
rw[i] =
|
|
213
|
+
rw[i] = std::abs(y[i] - fit[i]);
|
|
198
214
|
}
|
|
199
215
|
|
|
200
216
|
auto mid1 = (n - 1) / 2;
|
|
@@ -208,18 +224,19 @@ void rwts(const float* y, size_t n, const float* fit, float* rw) {
|
|
|
208
224
|
auto c1 = 0.001 * cmad;
|
|
209
225
|
|
|
210
226
|
for (size_t i = 0; i < n; i++) {
|
|
211
|
-
auto r =
|
|
227
|
+
auto r = std::abs(y[i] - fit[i]);
|
|
212
228
|
if (r <= c1) {
|
|
213
229
|
rw[i] = 1.0;
|
|
214
230
|
} else if (r <= c9) {
|
|
215
|
-
rw[i] = pow(1.0 - pow(r / cmad, 2), 2);
|
|
231
|
+
rw[i] = (T) std::pow(1.0 - std::pow(r / cmad, 2), 2);
|
|
216
232
|
} else {
|
|
217
233
|
rw[i] = 0.0;
|
|
218
234
|
}
|
|
219
235
|
}
|
|
220
236
|
}
|
|
221
237
|
|
|
222
|
-
|
|
238
|
+
template<typename T>
|
|
239
|
+
void ss(const T* y, size_t n, size_t np, size_t ns, int isdeg, size_t nsjump, bool userw, T* rw, T* season, T* work1, T* work2, T* work3, T* work4) {
|
|
223
240
|
for (size_t j = 1; j <= np; j++) {
|
|
224
241
|
size_t k = (n - j) / np + 1;
|
|
225
242
|
|
|
@@ -232,14 +249,14 @@ void ss(const float* y, size_t n, size_t np, size_t ns, int isdeg, size_t nsjump
|
|
|
232
249
|
}
|
|
233
250
|
}
|
|
234
251
|
ess(work1, k, ns, isdeg, nsjump, userw, work3, work2 + 1, work4);
|
|
235
|
-
|
|
252
|
+
T xs = 0.0;
|
|
236
253
|
auto nright = std::min(ns, k);
|
|
237
254
|
auto ok = est(work1, k, ns, isdeg, xs, &work2[0], 1, nright, work4, userw, work3);
|
|
238
255
|
if (!ok) {
|
|
239
256
|
work2[0] = work2[1];
|
|
240
257
|
}
|
|
241
258
|
xs = k + 1;
|
|
242
|
-
size_t nleft = std::max(1, (int) k - (int) ns + 1);
|
|
259
|
+
size_t nleft = (size_t) std::max(1, (int) k - (int) ns + 1);
|
|
243
260
|
ok = est(work1, k, ns, isdeg, xs, &work2[k + 1], nleft, k, work4, userw, work3);
|
|
244
261
|
if (!ok) {
|
|
245
262
|
work2[k + 1] = work2[k];
|
|
@@ -250,7 +267,8 @@ void ss(const float* y, size_t n, size_t np, size_t ns, int isdeg, size_t nsjump
|
|
|
250
267
|
}
|
|
251
268
|
}
|
|
252
269
|
|
|
253
|
-
|
|
270
|
+
template<typename T>
|
|
271
|
+
void onestp(const T* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, int isdeg, int itdeg, int ildeg, size_t nsjump, size_t ntjump, size_t nljump, size_t ni, bool userw, T* rw, T* season, T* trend, T* work1, T* work2, T* work3, T* work4, T* work5) {
|
|
254
272
|
for (size_t j = 0; j < ni; j++) {
|
|
255
273
|
for (size_t i = 0; i < n; i++) {
|
|
256
274
|
work1[i] = y[i] - trend[i];
|
|
@@ -269,7 +287,8 @@ void onestp(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl
|
|
|
269
287
|
}
|
|
270
288
|
}
|
|
271
289
|
|
|
272
|
-
|
|
290
|
+
template<typename T>
|
|
291
|
+
void stl(const T* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, int isdeg, int itdeg, int ildeg, size_t nsjump, size_t ntjump, size_t nljump, size_t ni, size_t no, T* rw, T* season, T* trend) {
|
|
273
292
|
if (ns < 3) {
|
|
274
293
|
throw std::invalid_argument("seasonal_length must be at least 3");
|
|
275
294
|
}
|
|
@@ -303,11 +322,11 @@ void stl(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, i
|
|
|
303
322
|
throw std::invalid_argument("low_pass_length must be odd");
|
|
304
323
|
}
|
|
305
324
|
|
|
306
|
-
auto work1 = std::vector<
|
|
307
|
-
auto work2 = std::vector<
|
|
308
|
-
auto work3 = std::vector<
|
|
309
|
-
auto work4 = std::vector<
|
|
310
|
-
auto work5 = std::vector<
|
|
325
|
+
auto work1 = std::vector<T>(n + 2 * np);
|
|
326
|
+
auto work2 = std::vector<T>(n + 2 * np);
|
|
327
|
+
auto work3 = std::vector<T>(n + 2 * np);
|
|
328
|
+
auto work4 = std::vector<T>(n + 2 * np);
|
|
329
|
+
auto work5 = std::vector<T>(n + 2 * np);
|
|
311
330
|
|
|
312
331
|
auto userw = false;
|
|
313
332
|
size_t k = 0;
|
|
@@ -332,44 +351,62 @@ void stl(const float* y, size_t n, size_t np, size_t ns, size_t nt, size_t nl, i
|
|
|
332
351
|
}
|
|
333
352
|
}
|
|
334
353
|
|
|
335
|
-
|
|
354
|
+
template<typename T>
|
|
355
|
+
double var(const std::vector<T>& series) {
|
|
336
356
|
auto mean = std::accumulate(series.begin(), series.end(), 0.0) / series.size();
|
|
337
|
-
|
|
338
|
-
tmp.reserve(series.size());
|
|
357
|
+
double sum = 0.0;
|
|
339
358
|
for (auto v : series) {
|
|
340
|
-
|
|
359
|
+
double diff = v - mean;
|
|
360
|
+
sum += diff * diff;
|
|
361
|
+
}
|
|
362
|
+
return sum / (series.size() - 1);
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
template<typename T>
|
|
366
|
+
double strength(const std::vector<T>& component, const std::vector<T>& remainder) {
|
|
367
|
+
std::vector<T> sr;
|
|
368
|
+
sr.reserve(remainder.size());
|
|
369
|
+
for (size_t i = 0; i < remainder.size(); i++) {
|
|
370
|
+
sr.push_back(component[i] + remainder[i]);
|
|
341
371
|
}
|
|
342
|
-
return std::
|
|
372
|
+
return std::max(0.0, 1.0 - var(remainder) / var(sr));
|
|
343
373
|
}
|
|
344
374
|
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
/// A STL result.
|
|
378
|
+
template<typename T = float>
|
|
345
379
|
class StlResult {
|
|
346
380
|
public:
|
|
347
|
-
|
|
348
|
-
std::vector<
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
381
|
+
/// Returns the seasonal component.
|
|
382
|
+
std::vector<T> seasonal;
|
|
383
|
+
|
|
384
|
+
/// Returns the trend component.
|
|
385
|
+
std::vector<T> trend;
|
|
386
|
+
|
|
387
|
+
/// Returns the remainder.
|
|
388
|
+
std::vector<T> remainder;
|
|
389
|
+
|
|
390
|
+
/// Returns the weights.
|
|
391
|
+
std::vector<T> weights;
|
|
392
|
+
|
|
393
|
+
/// Returns the seasonal strength.
|
|
394
|
+
inline double seasonal_strength() const {
|
|
395
|
+
return strength(seasonal, remainder);
|
|
359
396
|
}
|
|
360
397
|
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
for (size_t i = 0; i < remainder.size(); i++) {
|
|
365
|
-
tr.push_back(trend[i] + remainder[i]);
|
|
366
|
-
}
|
|
367
|
-
return std::max(0.0, 1.0 - var(remainder) / var(tr));
|
|
398
|
+
/// Returns the trend strength.
|
|
399
|
+
inline double trend_strength() const {
|
|
400
|
+
return strength(trend, remainder);
|
|
368
401
|
}
|
|
369
402
|
};
|
|
370
403
|
|
|
404
|
+
/// A set of STL parameters.
|
|
371
405
|
class StlParams {
|
|
406
|
+
public:
|
|
407
|
+
/// @private
|
|
372
408
|
std::optional<size_t> ns_ = std::nullopt;
|
|
409
|
+
private:
|
|
373
410
|
std::optional<size_t> nt_ = std::nullopt;
|
|
374
411
|
std::optional<size_t> nl_ = std::nullopt;
|
|
375
412
|
int isdeg_ = 0;
|
|
@@ -383,75 +420,104 @@ class StlParams {
|
|
|
383
420
|
bool robust_ = false;
|
|
384
421
|
|
|
385
422
|
public:
|
|
386
|
-
|
|
387
|
-
|
|
423
|
+
/// Sets the length of the seasonal smoother.
|
|
424
|
+
inline StlParams seasonal_length(size_t length) {
|
|
425
|
+
this->ns_ = length;
|
|
388
426
|
return *this;
|
|
389
427
|
}
|
|
390
428
|
|
|
391
|
-
|
|
392
|
-
|
|
429
|
+
/// Sets the length of the trend smoother.
|
|
430
|
+
inline StlParams trend_length(size_t length) {
|
|
431
|
+
this->nt_ = length;
|
|
393
432
|
return *this;
|
|
394
433
|
}
|
|
395
434
|
|
|
396
|
-
|
|
397
|
-
|
|
435
|
+
/// Sets the length of the low-pass filter.
|
|
436
|
+
inline StlParams low_pass_length(size_t length) {
|
|
437
|
+
this->nl_ = length;
|
|
398
438
|
return *this;
|
|
399
439
|
}
|
|
400
440
|
|
|
401
|
-
|
|
402
|
-
|
|
441
|
+
/// Sets the degree of locally-fitted polynomial in seasonal smoothing.
|
|
442
|
+
inline StlParams seasonal_degree(int degree) {
|
|
443
|
+
this->isdeg_ = degree;
|
|
403
444
|
return *this;
|
|
404
445
|
}
|
|
405
446
|
|
|
406
|
-
|
|
407
|
-
|
|
447
|
+
/// Sets the degree of locally-fitted polynomial in trend smoothing.
|
|
448
|
+
inline StlParams trend_degree(int degree) {
|
|
449
|
+
this->itdeg_ = degree;
|
|
408
450
|
return *this;
|
|
409
451
|
}
|
|
410
452
|
|
|
411
|
-
|
|
412
|
-
|
|
453
|
+
/// Sets the degree of locally-fitted polynomial in low-pass smoothing.
|
|
454
|
+
inline StlParams low_pass_degree(int degree) {
|
|
455
|
+
this->ildeg_ = degree;
|
|
413
456
|
return *this;
|
|
414
457
|
}
|
|
415
458
|
|
|
416
|
-
|
|
417
|
-
|
|
459
|
+
/// Sets the skipping value for seasonal smoothing.
|
|
460
|
+
inline StlParams seasonal_jump(size_t jump) {
|
|
461
|
+
this->nsjump_ = jump;
|
|
418
462
|
return *this;
|
|
419
463
|
}
|
|
420
464
|
|
|
421
|
-
|
|
422
|
-
|
|
465
|
+
/// Sets the skipping value for trend smoothing.
|
|
466
|
+
inline StlParams trend_jump(size_t jump) {
|
|
467
|
+
this->ntjump_ = jump;
|
|
423
468
|
return *this;
|
|
424
469
|
}
|
|
425
470
|
|
|
426
|
-
|
|
427
|
-
|
|
471
|
+
/// Sets the skipping value for low-pass smoothing.
|
|
472
|
+
inline StlParams low_pass_jump(size_t jump) {
|
|
473
|
+
this->nljump_ = jump;
|
|
428
474
|
return *this;
|
|
429
475
|
}
|
|
430
476
|
|
|
431
|
-
|
|
432
|
-
|
|
477
|
+
/// Sets the number of loops for updating the seasonal and trend components.
|
|
478
|
+
inline StlParams inner_loops(size_t loops) {
|
|
479
|
+
this->ni_ = loops;
|
|
433
480
|
return *this;
|
|
434
481
|
}
|
|
435
482
|
|
|
436
|
-
|
|
437
|
-
|
|
483
|
+
/// Sets the number of iterations of robust fitting.
|
|
484
|
+
inline StlParams outer_loops(size_t loops) {
|
|
485
|
+
this->no_ = loops;
|
|
438
486
|
return *this;
|
|
439
487
|
}
|
|
440
488
|
|
|
489
|
+
/// Sets whether robustness iterations are to be used.
|
|
441
490
|
inline StlParams robust(bool robust) {
|
|
442
491
|
this->robust_ = robust;
|
|
443
492
|
return *this;
|
|
444
493
|
}
|
|
445
494
|
|
|
446
|
-
|
|
447
|
-
|
|
495
|
+
/// Decomposes a time series from an array.
|
|
496
|
+
template<typename T>
|
|
497
|
+
StlResult<T> fit(const T* series, size_t series_size, size_t period) const;
|
|
498
|
+
|
|
499
|
+
/// Decomposes a time series from a vector.
|
|
500
|
+
template<typename T>
|
|
501
|
+
StlResult<T> fit(const std::vector<T>& series, size_t period) const;
|
|
502
|
+
|
|
503
|
+
#if __cplusplus >= 202002L
|
|
504
|
+
/// Decomposes a time series from a span.
|
|
505
|
+
template<typename T>
|
|
506
|
+
StlResult<T> fit(std::span<const T> series, size_t period) const;
|
|
507
|
+
#endif
|
|
448
508
|
};
|
|
449
509
|
|
|
450
|
-
|
|
510
|
+
/// Creates a new set of STL parameters.
|
|
511
|
+
inline StlParams params() {
|
|
451
512
|
return StlParams();
|
|
452
513
|
}
|
|
453
514
|
|
|
454
|
-
|
|
515
|
+
template<typename T>
|
|
516
|
+
StlResult<T> StlParams::fit(const T* series, size_t series_size, size_t period) const {
|
|
517
|
+
auto y = series;
|
|
518
|
+
auto np = period;
|
|
519
|
+
auto n = series_size;
|
|
520
|
+
|
|
455
521
|
if (n < 2 * np) {
|
|
456
522
|
throw std::invalid_argument("series has less than two periods");
|
|
457
523
|
}
|
|
@@ -461,11 +527,11 @@ StlResult StlParams::fit(const float* y, size_t n, size_t np) {
|
|
|
461
527
|
auto isdeg = this->isdeg_;
|
|
462
528
|
auto itdeg = this->itdeg_;
|
|
463
529
|
|
|
464
|
-
auto res = StlResult {
|
|
465
|
-
std::vector<
|
|
466
|
-
std::vector<
|
|
467
|
-
std::vector<
|
|
468
|
-
std::vector<
|
|
530
|
+
auto res = StlResult<T> {
|
|
531
|
+
std::vector<T>(n),
|
|
532
|
+
std::vector<T>(n),
|
|
533
|
+
std::vector<T>(),
|
|
534
|
+
std::vector<T>(n)
|
|
469
535
|
};
|
|
470
536
|
|
|
471
537
|
auto ildeg = this->ildeg_.value_or(itdeg);
|
|
@@ -504,8 +570,257 @@ StlResult StlParams::fit(const float* y, size_t n, size_t np) {
|
|
|
504
570
|
return res;
|
|
505
571
|
}
|
|
506
572
|
|
|
507
|
-
|
|
508
|
-
|
|
573
|
+
template<typename T>
|
|
574
|
+
StlResult<T> StlParams::fit(const std::vector<T>& series, size_t period) const {
|
|
575
|
+
return StlParams::fit(series.data(), series.size(), period);
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
#if __cplusplus >= 202002L
|
|
579
|
+
template<typename T>
|
|
580
|
+
StlResult<T> StlParams::fit(std::span<const T> series, size_t period) const {
|
|
581
|
+
return StlParams::fit(series.data(), series.size(), period);
|
|
582
|
+
}
|
|
583
|
+
#endif
|
|
584
|
+
|
|
585
|
+
/// A MSTL result.
|
|
586
|
+
template<typename T = float>
|
|
587
|
+
class MstlResult {
|
|
588
|
+
public:
|
|
589
|
+
/// Returns the seasonal component.
|
|
590
|
+
std::vector<std::vector<T>> seasonal;
|
|
591
|
+
|
|
592
|
+
/// Returns the trend component.
|
|
593
|
+
std::vector<T> trend;
|
|
594
|
+
|
|
595
|
+
/// Returns the remainder.
|
|
596
|
+
std::vector<T> remainder;
|
|
597
|
+
|
|
598
|
+
/// Returns the seasonal strength.
|
|
599
|
+
inline std::vector<double> seasonal_strength() const {
|
|
600
|
+
std::vector<double> res;
|
|
601
|
+
for (auto& s : seasonal) {
|
|
602
|
+
res.push_back(strength(s, remainder));
|
|
603
|
+
}
|
|
604
|
+
return res;
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
/// Returns the trend strength.
|
|
608
|
+
inline double trend_strength() const {
|
|
609
|
+
return strength(trend, remainder);
|
|
610
|
+
}
|
|
611
|
+
};
|
|
612
|
+
|
|
613
|
+
/// A set of MSTL parameters.
|
|
614
|
+
class MstlParams {
|
|
615
|
+
size_t iterate_ = 2;
|
|
616
|
+
std::optional<float> lambda_ = std::nullopt;
|
|
617
|
+
std::optional<std::vector<size_t>> swin_ = std::nullopt;
|
|
618
|
+
StlParams stl_params_;
|
|
619
|
+
|
|
620
|
+
public:
|
|
621
|
+
/// Sets the number of iterations.
|
|
622
|
+
inline MstlParams iterations(size_t iterations) {
|
|
623
|
+
this->iterate_ = iterations;
|
|
624
|
+
return *this;
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
/// Sets lambda for Box-Cox transformation.
|
|
628
|
+
inline MstlParams lambda(float lambda) {
|
|
629
|
+
this->lambda_ = lambda;
|
|
630
|
+
return *this;
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
/// Sets the lengths of the seasonal smoothers.
|
|
634
|
+
inline MstlParams seasonal_lengths(const std::vector<size_t>& lengths) {
|
|
635
|
+
this->swin_ = lengths;
|
|
636
|
+
return *this;
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
/// Sets the STL parameters.
|
|
640
|
+
inline MstlParams stl_params(const StlParams& stl_params) {
|
|
641
|
+
this->stl_params_ = stl_params;
|
|
642
|
+
return *this;
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
/// Decomposes a time series from an array.
|
|
646
|
+
template<typename T>
|
|
647
|
+
MstlResult<T> fit(const T* series, size_t series_size, const size_t* periods, size_t periods_size) const;
|
|
648
|
+
|
|
649
|
+
/// Decomposes a time series from a vector.
|
|
650
|
+
template<typename T>
|
|
651
|
+
MstlResult<T> fit(const std::vector<T>& series, const std::vector<size_t>& periods) const;
|
|
652
|
+
|
|
653
|
+
#if __cplusplus >= 202002L
|
|
654
|
+
/// Decomposes a time series from a span.
|
|
655
|
+
template<typename T>
|
|
656
|
+
MstlResult<T> fit(std::span<const T> series, std::span<const size_t> periods) const;
|
|
657
|
+
#endif
|
|
658
|
+
};
|
|
659
|
+
|
|
660
|
+
/// Creates a new set of MSTL parameters.
|
|
661
|
+
inline MstlParams mstl_params() {
|
|
662
|
+
return MstlParams();
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
namespace {
|
|
666
|
+
|
|
667
|
+
template<typename T>
|
|
668
|
+
std::vector<T> box_cox(const T* y, size_t y_size, float lambda) {
|
|
669
|
+
std::vector<T> res;
|
|
670
|
+
res.reserve(y_size);
|
|
671
|
+
if (lambda != 0.0) {
|
|
672
|
+
for (size_t i = 0; i < y_size; i++) {
|
|
673
|
+
res.push_back((T) (std::pow(y[i], lambda) - 1.0) / lambda);
|
|
674
|
+
}
|
|
675
|
+
} else {
|
|
676
|
+
for (size_t i = 0; i < y_size; i++) {
|
|
677
|
+
res.push_back(std::log(y[i]));
|
|
678
|
+
}
|
|
679
|
+
}
|
|
680
|
+
return res;
|
|
681
|
+
}
|
|
682
|
+
|
|
683
|
+
template<typename T>
|
|
684
|
+
std::tuple<std::vector<T>, std::vector<T>, std::vector<std::vector<T>>> mstl(
|
|
685
|
+
const T* x,
|
|
686
|
+
size_t k,
|
|
687
|
+
const size_t* seas_ids,
|
|
688
|
+
size_t seas_size,
|
|
689
|
+
size_t iterate,
|
|
690
|
+
std::optional<float> lambda,
|
|
691
|
+
const std::optional<std::vector<size_t>>& swin,
|
|
692
|
+
const StlParams& stl_params
|
|
693
|
+
) {
|
|
694
|
+
// keep track of indices instead of sorting seas_ids
|
|
695
|
+
// so order is preserved with seasonality
|
|
696
|
+
std::vector<size_t> indices;
|
|
697
|
+
for (size_t i = 0; i < seas_size; i++) {
|
|
698
|
+
indices.push_back(i);
|
|
699
|
+
}
|
|
700
|
+
std::sort(indices.begin(), indices.end(), [&seas_ids](size_t a, size_t b) {
|
|
701
|
+
return seas_ids[a] < seas_ids[b];
|
|
702
|
+
});
|
|
703
|
+
|
|
704
|
+
if (seas_size == 1) {
|
|
705
|
+
iterate = 1;
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
std::vector<std::vector<T>> seasonality;
|
|
709
|
+
seasonality.reserve(seas_size);
|
|
710
|
+
std::vector<T> trend;
|
|
711
|
+
|
|
712
|
+
auto deseas = lambda.has_value() ? box_cox(x, k, lambda.value()) : std::vector<T>(x, x + k);
|
|
713
|
+
|
|
714
|
+
if (seas_size != 0) {
|
|
715
|
+
for (size_t i = 0; i < seas_size; i++) {
|
|
716
|
+
seasonality.push_back(std::vector<T>());
|
|
717
|
+
}
|
|
718
|
+
|
|
719
|
+
for (size_t j = 0; j < iterate; j++) {
|
|
720
|
+
for (size_t i = 0; i < indices.size(); i++) {
|
|
721
|
+
auto idx = indices[i];
|
|
722
|
+
|
|
723
|
+
if (j > 0) {
|
|
724
|
+
for (size_t ii = 0; ii < deseas.size(); ii++) {
|
|
725
|
+
deseas[ii] += seasonality[idx][ii];
|
|
726
|
+
}
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
StlResult<T> fit;
|
|
730
|
+
if (swin) {
|
|
731
|
+
StlParams clone = stl_params;
|
|
732
|
+
fit = clone.seasonal_length((*swin)[idx]).fit(deseas, seas_ids[idx]);
|
|
733
|
+
} else if (stl_params.ns_.has_value()) {
|
|
734
|
+
fit = stl_params.fit(deseas, seas_ids[idx]);
|
|
735
|
+
} else {
|
|
736
|
+
StlParams clone = stl_params;
|
|
737
|
+
fit = clone.seasonal_length(7 + 4 * (i + 1)).fit(deseas, seas_ids[idx]);
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
seasonality[idx] = fit.seasonal;
|
|
741
|
+
trend = fit.trend;
|
|
742
|
+
|
|
743
|
+
for (size_t ii = 0; ii < deseas.size(); ii++) {
|
|
744
|
+
deseas[ii] -= seasonality[idx][ii];
|
|
745
|
+
}
|
|
746
|
+
}
|
|
747
|
+
}
|
|
748
|
+
} else {
|
|
749
|
+
// TODO use Friedman's Super Smoother for trend
|
|
750
|
+
throw std::invalid_argument("periods must not be empty");
|
|
751
|
+
}
|
|
752
|
+
|
|
753
|
+
std::vector<T> remainder;
|
|
754
|
+
remainder.reserve(k);
|
|
755
|
+
for (size_t i = 0; i < k; i++) {
|
|
756
|
+
remainder.push_back(deseas[i] - trend[i]);
|
|
757
|
+
}
|
|
758
|
+
|
|
759
|
+
return std::make_tuple(trend, remainder, seasonality);
|
|
760
|
+
}
|
|
761
|
+
|
|
762
|
+
}
|
|
763
|
+
|
|
764
|
+
template<typename T>
|
|
765
|
+
MstlResult<T> MstlParams::fit(const T* series, size_t series_size, const size_t* periods, size_t periods_size) const {
|
|
766
|
+
// return error to be consistent with stl
|
|
767
|
+
// and ensure seasonal is always same length as periods
|
|
768
|
+
for (size_t i = 0; i < periods_size; i++) {
|
|
769
|
+
if (periods[i] < 2) {
|
|
770
|
+
throw std::invalid_argument("periods must be at least 2");
|
|
771
|
+
}
|
|
772
|
+
}
|
|
773
|
+
|
|
774
|
+
// return error to be consistent with stl
|
|
775
|
+
// and ensure seasonal is always same length as periods
|
|
776
|
+
for (size_t i = 0; i < periods_size; i++) {
|
|
777
|
+
if (series_size < periods[i] * 2) {
|
|
778
|
+
throw std::invalid_argument("series has less than two periods");
|
|
779
|
+
}
|
|
780
|
+
}
|
|
781
|
+
|
|
782
|
+
if (lambda_.has_value()) {
|
|
783
|
+
auto lambda = lambda_.value();
|
|
784
|
+
if (lambda < 0 || lambda > 1) {
|
|
785
|
+
throw std::invalid_argument("lambda must be between 0 and 1");
|
|
786
|
+
}
|
|
787
|
+
}
|
|
788
|
+
|
|
789
|
+
if (swin_.has_value()) {
|
|
790
|
+
auto swin = swin_.value();
|
|
791
|
+
if (swin.size() != periods_size) {
|
|
792
|
+
throw std::invalid_argument("seasonal_lengths must have the same length as periods");
|
|
793
|
+
}
|
|
794
|
+
}
|
|
795
|
+
|
|
796
|
+
auto [trend, remainder, seasonal] = mstl(
|
|
797
|
+
series,
|
|
798
|
+
series_size,
|
|
799
|
+
periods,
|
|
800
|
+
periods_size,
|
|
801
|
+
iterate_,
|
|
802
|
+
lambda_,
|
|
803
|
+
swin_,
|
|
804
|
+
stl_params_
|
|
805
|
+
);
|
|
806
|
+
|
|
807
|
+
return MstlResult<T> {
|
|
808
|
+
seasonal,
|
|
809
|
+
trend,
|
|
810
|
+
remainder
|
|
811
|
+
};
|
|
812
|
+
}
|
|
813
|
+
|
|
814
|
+
template<typename T>
|
|
815
|
+
MstlResult<T> MstlParams::fit(const std::vector<T>& series, const std::vector<size_t>& periods) const {
|
|
816
|
+
return MstlParams::fit(series.data(), series.size(), periods.data(), periods.size());
|
|
817
|
+
}
|
|
818
|
+
|
|
819
|
+
#if __cplusplus >= 202002L
|
|
820
|
+
template<typename T>
|
|
821
|
+
MstlResult<T> MstlParams::fit(std::span<const T> series, std::span<const size_t> periods) const {
|
|
822
|
+
return MstlParams::fit(series.data(), series.size(), periods.data(), periods.size());
|
|
509
823
|
}
|
|
824
|
+
#endif
|
|
510
825
|
|
|
511
826
|
}
|
metadata
CHANGED
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: anomaly_detection
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.3.
|
|
4
|
+
version: 0.3.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Andrew Kane
|
|
8
|
-
autorequire:
|
|
9
8
|
bindir: bin
|
|
10
9
|
cert_chain: []
|
|
11
|
-
date:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
12
11
|
dependencies:
|
|
13
12
|
- !ruby/object:Gem::Dependency
|
|
14
13
|
name: rice
|
|
@@ -16,15 +15,14 @@ dependencies:
|
|
|
16
15
|
requirements:
|
|
17
16
|
- - ">="
|
|
18
17
|
- !ruby/object:Gem::Version
|
|
19
|
-
version: 4.
|
|
18
|
+
version: '4.7'
|
|
20
19
|
type: :runtime
|
|
21
20
|
prerelease: false
|
|
22
21
|
version_requirements: !ruby/object:Gem::Requirement
|
|
23
22
|
requirements:
|
|
24
23
|
- - ">="
|
|
25
24
|
- !ruby/object:Gem::Version
|
|
26
|
-
version: 4.
|
|
27
|
-
description:
|
|
25
|
+
version: '4.7'
|
|
28
26
|
email: andrew@ankane.org
|
|
29
27
|
executables: []
|
|
30
28
|
extensions:
|
|
@@ -52,7 +50,6 @@ homepage: https://github.com/ankane/AnomalyDetection.rb
|
|
|
52
50
|
licenses:
|
|
53
51
|
- GPL-3.0-or-later
|
|
54
52
|
metadata: {}
|
|
55
|
-
post_install_message:
|
|
56
53
|
rdoc_options: []
|
|
57
54
|
require_paths:
|
|
58
55
|
- lib
|
|
@@ -67,8 +64,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
67
64
|
- !ruby/object:Gem::Version
|
|
68
65
|
version: '0'
|
|
69
66
|
requirements: []
|
|
70
|
-
rubygems_version: 3.
|
|
71
|
-
signing_key:
|
|
67
|
+
rubygems_version: 3.6.9
|
|
72
68
|
specification_version: 4
|
|
73
69
|
summary: Time series anomaly detection for Ruby
|
|
74
70
|
test_files: []
|