breakout-detection 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +15 -1
- data/ext/breakout/edm_multi.cpp +1 -1
- data/ext/breakout/edm_percent.cpp +3 -3
- data/ext/breakout/edm_tail.cpp +1 -2
- data/ext/breakout/edmx.cpp +1 -1
- data/lib/breakout/version.rb +1 -1
- data/lib/breakout.rb +71 -17
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6678a5f3c82af3a88809391a6c692f3d0af7d6d6acb335936a6a0712ed0a2176
|
4
|
+
data.tar.gz: d44d0f92be5412e5f49e095f9b3a1d0c564c30ac2e42911e276bc1092c10edd3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 343f0d3245d495d022b2953009f724d0bf83f2448280b4e9ad0b4ca678241ac57044b9d6b3efbb2d473f2fc61d0083e601fbae669b243196aba080e440296bce
|
7
|
+
data.tar.gz: 129bc113eae6cda7e081ad8cc15f7b01f772bd7504187b96ad0e2a3af583e256d7127049e1528c5de4c9eeb87dad2e9db00101c35e14def788243f073226c36a
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
:fire: [BreakoutDetection](https://github.com/twitter/BreakoutDetection) for Ruby
|
4
4
|
|
5
|
-
Learn
|
5
|
+
Learn [how it works](https://blog.twitter.com/engineering/en_us/a/2014/breakout-detection-in-the-wild)
|
6
6
|
|
7
7
|
[](https://github.com/ankane/breakout/actions)
|
8
8
|
|
@@ -60,6 +60,20 @@ Breakout.detect(
|
|
60
60
|
)
|
61
61
|
```
|
62
62
|
|
63
|
+
## Plotting
|
64
|
+
|
65
|
+
Add [Vega](https://github.com/ankane/vega) to your application’s Gemfile:
|
66
|
+
|
67
|
+
```ruby
|
68
|
+
gem 'vega'
|
69
|
+
```
|
70
|
+
|
71
|
+
And use:
|
72
|
+
|
73
|
+
```ruby
|
74
|
+
Breakout.plot(series, breakouts)
|
75
|
+
```
|
76
|
+
|
63
77
|
## Credits
|
64
78
|
|
65
79
|
This library uses the C++ code from the [BreakoutDetection](https://github.com/twitter/BreakoutDetection) R package and is available under the same license.
|
data/ext/breakout/edm_multi.cpp
CHANGED
@@ -50,7 +50,7 @@ std::vector<int> EDM_multi(const std::vector<double>& Z, int min_size = 24, doub
|
|
50
50
|
for (int i = min_size - 1; i < s; ++i)
|
51
51
|
insert_element(right_min, right_max, Z[i]);
|
52
52
|
|
53
|
-
// Iterate over possible locations for the
|
53
|
+
// Iterate over possible locations for the penultimate change
|
54
54
|
for (int t = min_size; t < s - min_size + 1; ++t) { // modify limits to deal with min_size
|
55
55
|
insert_element(left_min, left_max, Z[t - 1]); // insert element into left tree
|
56
56
|
remove_element(right_min, right_max, Z[t - 1]); // remove element from right tree
|
@@ -1,7 +1,7 @@
|
|
1
1
|
/*
|
2
|
-
Penalizes based on percent
|
3
|
-
Linear penalty means that each new breakout must result in an at least X%
|
4
|
-
Quadratic penalty means that each new
|
2
|
+
Penalizes based on percent change in the statistic value.
|
3
|
+
Linear penalty means that each new breakout must result in an at least X% increase
|
4
|
+
Quadratic penalty means that each new breakout must result in at least an (X*k)% increase for k breakouts
|
5
5
|
*/
|
6
6
|
|
7
7
|
#include <algorithm>
|
data/ext/breakout/edm_tail.cpp
CHANGED
@@ -355,7 +355,6 @@ void BackwardUpdate(std::vector<double>& Z, Information& info, int& tau1, double
|
|
355
355
|
index /= 2;
|
356
356
|
}
|
357
357
|
}
|
358
|
-
double qb = std::pow(GetQuantile(info.B, quant), alpha);
|
359
358
|
// Move tau2 from the end of the time series to the front.
|
360
359
|
// Update the statistic value along the way
|
361
360
|
tau2 = N;
|
@@ -366,7 +365,7 @@ void BackwardUpdate(std::vector<double>& Z, Information& info, int& tau1, double
|
|
366
365
|
--info.B[index];
|
367
366
|
index /= 2;
|
368
367
|
}
|
369
|
-
qb = std::pow(GetQuantile(info.B, quant), alpha);
|
368
|
+
double qb = std::pow(GetQuantile(info.B, quant), alpha);
|
370
369
|
|
371
370
|
double stat = 2 * qc - qa - qb;
|
372
371
|
stat *= (double)(tau2 - tau1) * tau1 / tau2;
|
data/ext/breakout/edmx.cpp
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
/*
|
2
2
|
Robust estimation of 2[mean(X)-mean(Y)]^2 time normalization factor
|
3
3
|
This is the E-Divisive E-statistic when alpha = 2
|
4
|
-
Instead of calculating mean(X) we calculate median(X), and similarly for Y
|
4
|
+
Instead of calculating mean(X), we calculate median(X), and similarly for Y
|
5
5
|
*/
|
6
6
|
|
7
7
|
#include <algorithm>
|
data/lib/breakout/version.rb
CHANGED
data/lib/breakout.rb
CHANGED
@@ -5,24 +5,78 @@ require "breakout/ext"
|
|
5
5
|
require "breakout/version"
|
6
6
|
|
7
7
|
module Breakout
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
8
|
+
class << self
|
9
|
+
def detect(series, min_size: 30, method: "multi", alpha: 2, beta: nil, degree: 1, percent: nil, exact: true)
|
10
|
+
raise ArgumentError, "min_size must be at least 2" if min_size < 2
|
11
|
+
raise ArgumentError, "beta and percent cannot be passed together" unless beta.nil? || percent.nil?
|
12
|
+
raise ArgumentError, "alpha must be between 0 and 2" if alpha < 0 || alpha > 2
|
13
|
+
raise ArgumentError, "degree must be 0, 1, or 2" unless [0, 1, 2].include?(degree)
|
14
|
+
raise ArgumentError, "method must be amoc or multi" unless ["amoc", "multi"].include?(method)
|
15
|
+
|
16
|
+
return [] if series.size < min_size
|
17
|
+
|
18
|
+
if series.is_a?(Hash)
|
19
|
+
sorted = series.sort_by { |k, _| k }
|
20
|
+
z = sorted.map(&:last)
|
21
|
+
else
|
22
|
+
z = series
|
23
|
+
end
|
24
|
+
|
25
|
+
res = _detect(z, min_size, method, alpha, beta, degree, percent, exact)
|
26
|
+
res.map! { |i| sorted[i][0] } if series.is_a?(Hash)
|
27
|
+
res
|
28
|
+
end
|
29
|
+
|
30
|
+
def plot(series, breakouts)
|
31
|
+
require "vega"
|
32
|
+
|
33
|
+
data =
|
34
|
+
if series.is_a?(Hash)
|
35
|
+
series.map { |k, v| {x: iso8601(k), y: v, breakout: breakouts.include?(k)} }
|
36
|
+
else
|
37
|
+
series.map.with_index { |v, i| {x: i, y: v, breakout: breakouts.include?(i)} }
|
38
|
+
end
|
39
|
+
|
40
|
+
if series.is_a?(Hash)
|
41
|
+
x = {field: "x", type: "temporal"}
|
42
|
+
x["scale"] = {type: "utc"} if series.keys.first.is_a?(Date)
|
43
|
+
else
|
44
|
+
x = {field: "x", type: "quantitative"}
|
45
|
+
end
|
46
|
+
|
47
|
+
Vega.lite
|
48
|
+
.data(data)
|
49
|
+
.layer([
|
50
|
+
{
|
51
|
+
mark: {type: "line"},
|
52
|
+
encoding: {
|
53
|
+
x: x,
|
54
|
+
y: {field: "y", type: "quantitative", scale: {zero: false}},
|
55
|
+
color: {value: "#fa9088"}
|
56
|
+
}
|
57
|
+
},
|
58
|
+
{
|
59
|
+
transform: [{"filter": "datum.breakout == true"}],
|
60
|
+
mark: {type: "rule"},
|
61
|
+
encoding: {
|
62
|
+
x: x,
|
63
|
+
color: {value: "#19c7ca"},
|
64
|
+
strokeWidth: {value: 2},
|
65
|
+
strokeDash: {value: [6, 6]}
|
66
|
+
}
|
67
|
+
}
|
68
|
+
])
|
69
|
+
.config(axis: {title: nil, labelFontSize: 12})
|
22
70
|
end
|
23
71
|
|
24
|
-
|
25
|
-
|
26
|
-
|
72
|
+
private
|
73
|
+
|
74
|
+
def iso8601(v)
|
75
|
+
if v.is_a?(Date)
|
76
|
+
v.strftime("%Y-%m-%d")
|
77
|
+
else
|
78
|
+
v.strftime("%Y-%m-%dT%H:%M:%S.%L%z")
|
79
|
+
end
|
80
|
+
end
|
27
81
|
end
|
28
82
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: breakout-detection
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-10-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rice
|