breakout-detection 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +15 -1
- data/ext/breakout/edm_multi.cpp +1 -1
- data/ext/breakout/edm_percent.cpp +3 -3
- data/ext/breakout/edm_tail.cpp +1 -2
- data/ext/breakout/edmx.cpp +1 -1
- data/lib/breakout/version.rb +1 -1
- data/lib/breakout.rb +71 -17
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6678a5f3c82af3a88809391a6c692f3d0af7d6d6acb335936a6a0712ed0a2176
|
4
|
+
data.tar.gz: d44d0f92be5412e5f49e095f9b3a1d0c564c30ac2e42911e276bc1092c10edd3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 343f0d3245d495d022b2953009f724d0bf83f2448280b4e9ad0b4ca678241ac57044b9d6b3efbb2d473f2fc61d0083e601fbae669b243196aba080e440296bce
|
7
|
+
data.tar.gz: 129bc113eae6cda7e081ad8cc15f7b01f772bd7504187b96ad0e2a3af583e256d7127049e1528c5de4c9eeb87dad2e9db00101c35e14def788243f073226c36a
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
:fire: [BreakoutDetection](https://github.com/twitter/BreakoutDetection) for Ruby
|
4
4
|
|
5
|
-
Learn
|
5
|
+
Learn [how it works](https://blog.twitter.com/engineering/en_us/a/2014/breakout-detection-in-the-wild)
|
6
6
|
|
7
7
|
[![Build Status](https://github.com/ankane/breakout/workflows/build/badge.svg?branch=master)](https://github.com/ankane/breakout/actions)
|
8
8
|
|
@@ -60,6 +60,20 @@ Breakout.detect(
|
|
60
60
|
)
|
61
61
|
```
|
62
62
|
|
63
|
+
## Plotting
|
64
|
+
|
65
|
+
Add [Vega](https://github.com/ankane/vega) to your application’s Gemfile:
|
66
|
+
|
67
|
+
```ruby
|
68
|
+
gem 'vega'
|
69
|
+
```
|
70
|
+
|
71
|
+
And use:
|
72
|
+
|
73
|
+
```ruby
|
74
|
+
Breakout.plot(series, breakouts)
|
75
|
+
```
|
76
|
+
|
63
77
|
## Credits
|
64
78
|
|
65
79
|
This library uses the C++ code from the [BreakoutDetection](https://github.com/twitter/BreakoutDetection) R package and is available under the same license.
|
data/ext/breakout/edm_multi.cpp
CHANGED
@@ -50,7 +50,7 @@ std::vector<int> EDM_multi(const std::vector<double>& Z, int min_size = 24, doub
|
|
50
50
|
for (int i = min_size - 1; i < s; ++i)
|
51
51
|
insert_element(right_min, right_max, Z[i]);
|
52
52
|
|
53
|
-
// Iterate over possible locations for the
|
53
|
+
// Iterate over possible locations for the penultimate change
|
54
54
|
for (int t = min_size; t < s - min_size + 1; ++t) { // modify limits to deal with min_size
|
55
55
|
insert_element(left_min, left_max, Z[t - 1]); // insert element into left tree
|
56
56
|
remove_element(right_min, right_max, Z[t - 1]); // remove element from right tree
|
@@ -1,7 +1,7 @@
|
|
1
1
|
/*
|
2
|
-
Penalizes based on percent
|
3
|
-
Linear penalty means that each new breakout must result in an at least X%
|
4
|
-
Quadratic penalty means that each new
|
2
|
+
Penalizes based on percent change in the statistic value.
|
3
|
+
Linear penalty means that each new breakout must result in an at least X% increase
|
4
|
+
Quadratic penalty means that each new breakout must result in at least an (X*k)% increase for k breakouts
|
5
5
|
*/
|
6
6
|
|
7
7
|
#include <algorithm>
|
data/ext/breakout/edm_tail.cpp
CHANGED
@@ -355,7 +355,6 @@ void BackwardUpdate(std::vector<double>& Z, Information& info, int& tau1, double
|
|
355
355
|
index /= 2;
|
356
356
|
}
|
357
357
|
}
|
358
|
-
double qb = std::pow(GetQuantile(info.B, quant), alpha);
|
359
358
|
// Move tau2 from the end of the time series to the front.
|
360
359
|
// Update the statistic value along the way
|
361
360
|
tau2 = N;
|
@@ -366,7 +365,7 @@ void BackwardUpdate(std::vector<double>& Z, Information& info, int& tau1, double
|
|
366
365
|
--info.B[index];
|
367
366
|
index /= 2;
|
368
367
|
}
|
369
|
-
qb = std::pow(GetQuantile(info.B, quant), alpha);
|
368
|
+
double qb = std::pow(GetQuantile(info.B, quant), alpha);
|
370
369
|
|
371
370
|
double stat = 2 * qc - qa - qb;
|
372
371
|
stat *= (double)(tau2 - tau1) * tau1 / tau2;
|
data/ext/breakout/edmx.cpp
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
/*
|
2
2
|
Robust estimation of 2[mean(X)-mean(Y)]^2 time normalization factor
|
3
3
|
This is the E-Divisive E-statistic when alpha = 2
|
4
|
-
Instead of calculating mean(X) we calculate median(X), and similarly for Y
|
4
|
+
Instead of calculating mean(X), we calculate median(X), and similarly for Y
|
5
5
|
*/
|
6
6
|
|
7
7
|
#include <algorithm>
|
data/lib/breakout/version.rb
CHANGED
data/lib/breakout.rb
CHANGED
@@ -5,24 +5,78 @@ require "breakout/ext"
|
|
5
5
|
require "breakout/version"
|
6
6
|
|
7
7
|
module Breakout
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
8
|
+
class << self
|
9
|
+
def detect(series, min_size: 30, method: "multi", alpha: 2, beta: nil, degree: 1, percent: nil, exact: true)
|
10
|
+
raise ArgumentError, "min_size must be at least 2" if min_size < 2
|
11
|
+
raise ArgumentError, "beta and percent cannot be passed together" unless beta.nil? || percent.nil?
|
12
|
+
raise ArgumentError, "alpha must be between 0 and 2" if alpha < 0 || alpha > 2
|
13
|
+
raise ArgumentError, "degree must be 0, 1, or 2" unless [0, 1, 2].include?(degree)
|
14
|
+
raise ArgumentError, "method must be amoc or multi" unless ["amoc", "multi"].include?(method)
|
15
|
+
|
16
|
+
return [] if series.size < min_size
|
17
|
+
|
18
|
+
if series.is_a?(Hash)
|
19
|
+
sorted = series.sort_by { |k, _| k }
|
20
|
+
z = sorted.map(&:last)
|
21
|
+
else
|
22
|
+
z = series
|
23
|
+
end
|
24
|
+
|
25
|
+
res = _detect(z, min_size, method, alpha, beta, degree, percent, exact)
|
26
|
+
res.map! { |i| sorted[i][0] } if series.is_a?(Hash)
|
27
|
+
res
|
28
|
+
end
|
29
|
+
|
30
|
+
def plot(series, breakouts)
|
31
|
+
require "vega"
|
32
|
+
|
33
|
+
data =
|
34
|
+
if series.is_a?(Hash)
|
35
|
+
series.map { |k, v| {x: iso8601(k), y: v, breakout: breakouts.include?(k)} }
|
36
|
+
else
|
37
|
+
series.map.with_index { |v, i| {x: i, y: v, breakout: breakouts.include?(i)} }
|
38
|
+
end
|
39
|
+
|
40
|
+
if series.is_a?(Hash)
|
41
|
+
x = {field: "x", type: "temporal"}
|
42
|
+
x["scale"] = {type: "utc"} if series.keys.first.is_a?(Date)
|
43
|
+
else
|
44
|
+
x = {field: "x", type: "quantitative"}
|
45
|
+
end
|
46
|
+
|
47
|
+
Vega.lite
|
48
|
+
.data(data)
|
49
|
+
.layer([
|
50
|
+
{
|
51
|
+
mark: {type: "line"},
|
52
|
+
encoding: {
|
53
|
+
x: x,
|
54
|
+
y: {field: "y", type: "quantitative", scale: {zero: false}},
|
55
|
+
color: {value: "#fa9088"}
|
56
|
+
}
|
57
|
+
},
|
58
|
+
{
|
59
|
+
transform: [{"filter": "datum.breakout == true"}],
|
60
|
+
mark: {type: "rule"},
|
61
|
+
encoding: {
|
62
|
+
x: x,
|
63
|
+
color: {value: "#19c7ca"},
|
64
|
+
strokeWidth: {value: 2},
|
65
|
+
strokeDash: {value: [6, 6]}
|
66
|
+
}
|
67
|
+
}
|
68
|
+
])
|
69
|
+
.config(axis: {title: nil, labelFontSize: 12})
|
22
70
|
end
|
23
71
|
|
24
|
-
|
25
|
-
|
26
|
-
|
72
|
+
private
|
73
|
+
|
74
|
+
def iso8601(v)
|
75
|
+
if v.is_a?(Date)
|
76
|
+
v.strftime("%Y-%m-%d")
|
77
|
+
else
|
78
|
+
v.strftime("%Y-%m-%dT%H:%M:%S.%L%z")
|
79
|
+
end
|
80
|
+
end
|
27
81
|
end
|
28
82
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: breakout-detection
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-10-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rice
|