breakout-detection 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 8f81b5a2a9781787903f293322eef1ff6d08479cde2df25eea47c3023cd501bd
4
+ data.tar.gz: 1839625455de87fca184eb2f1fdf8bc7a2e8809c8afd17b9fc9ec61d6d775b7a
5
+ SHA512:
6
+ metadata.gz: 1fa14ae2cc9db547bdeb13a3232d87c96e239aa6dde618ea6b7fa9c53aaca5596b45223c3e7eeceb9a8aa0428de77c54c4e8275adf57b9767c13a889538981bb
7
+ data.tar.gz: 4ba5ee1decd2ab9084c3d377ec6d37a743c7f5ff47bb48eb55bdd5ecb9bce57df534522643c448b00a35cdd39d1a429ebc1c036440b079046c321bbef5e193a1
data/CHANGELOG.md ADDED
@@ -0,0 +1,3 @@
1
+ ## 0.1.0 (2021-09-02)
2
+
3
+ - First release
data/LICENSE.txt ADDED
@@ -0,0 +1,340 @@
1
+ GNU GENERAL PUBLIC LICENSE
2
+ Version 2, June 1991
3
+
4
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc., <http://fsf.org/>
5
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
6
+ Everyone is permitted to copy and distribute verbatim copies
7
+ of this license document, but changing it is not allowed.
8
+
9
+ Preamble
10
+
11
+ The licenses for most software are designed to take away your
12
+ freedom to share and change it. By contrast, the GNU General Public
13
+ License is intended to guarantee your freedom to share and change free
14
+ software--to make sure the software is free for all its users. This
15
+ General Public License applies to most of the Free Software
16
+ Foundation's software and to any other program whose authors commit to
17
+ using it. (Some other Free Software Foundation software is covered by
18
+ the GNU Lesser General Public License instead.) You can apply it to
19
+ your programs, too.
20
+
21
+ When we speak of free software, we are referring to freedom, not
22
+ price. Our General Public Licenses are designed to make sure that you
23
+ have the freedom to distribute copies of free software (and charge for
24
+ this service if you wish), that you receive source code or can get it
25
+ if you want it, that you can change the software or use pieces of it
26
+ in new free programs; and that you know you can do these things.
27
+
28
+ To protect your rights, we need to make restrictions that forbid
29
+ anyone to deny you these rights or to ask you to surrender the rights.
30
+ These restrictions translate to certain responsibilities for you if you
31
+ distribute copies of the software, or if you modify it.
32
+
33
+ For example, if you distribute copies of such a program, whether
34
+ gratis or for a fee, you must give the recipients all the rights that
35
+ you have. You must make sure that they, too, receive or can get the
36
+ source code. And you must show them these terms so they know their
37
+ rights.
38
+
39
+ We protect your rights with two steps: (1) copyright the software, and
40
+ (2) offer you this license which gives you legal permission to copy,
41
+ distribute and/or modify the software.
42
+
43
+ Also, for each author's protection and ours, we want to make certain
44
+ that everyone understands that there is no warranty for this free
45
+ software. If the software is modified by someone else and passed on, we
46
+ want its recipients to know that what they have is not the original, so
47
+ that any problems introduced by others will not reflect on the original
48
+ authors' reputations.
49
+
50
+ Finally, any free program is threatened constantly by software
51
+ patents. We wish to avoid the danger that redistributors of a free
52
+ program will individually obtain patent licenses, in effect making the
53
+ program proprietary. To prevent this, we have made it clear that any
54
+ patent must be licensed for everyone's free use or not licensed at all.
55
+
56
+ The precise terms and conditions for copying, distribution and
57
+ modification follow.
58
+
59
+ GNU GENERAL PUBLIC LICENSE
60
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
61
+
62
+ 0. This License applies to any program or other work which contains
63
+ a notice placed by the copyright holder saying it may be distributed
64
+ under the terms of this General Public License. The "Program", below,
65
+ refers to any such program or work, and a "work based on the Program"
66
+ means either the Program or any derivative work under copyright law:
67
+ that is to say, a work containing the Program or a portion of it,
68
+ either verbatim or with modifications and/or translated into another
69
+ language. (Hereinafter, translation is included without limitation in
70
+ the term "modification".) Each licensee is addressed as "you".
71
+
72
+ Activities other than copying, distribution and modification are not
73
+ covered by this License; they are outside its scope. The act of
74
+ running the Program is not restricted, and the output from the Program
75
+ is covered only if its contents constitute a work based on the
76
+ Program (independent of having been made by running the Program).
77
+ Whether that is true depends on what the Program does.
78
+
79
+ 1. You may copy and distribute verbatim copies of the Program's
80
+ source code as you receive it, in any medium, provided that you
81
+ conspicuously and appropriately publish on each copy an appropriate
82
+ copyright notice and disclaimer of warranty; keep intact all the
83
+ notices that refer to this License and to the absence of any warranty;
84
+ and give any other recipients of the Program a copy of this License
85
+ along with the Program.
86
+
87
+ You may charge a fee for the physical act of transferring a copy, and
88
+ you may at your option offer warranty protection in exchange for a fee.
89
+
90
+ 2. You may modify your copy or copies of the Program or any portion
91
+ of it, thus forming a work based on the Program, and copy and
92
+ distribute such modifications or work under the terms of Section 1
93
+ above, provided that you also meet all of these conditions:
94
+
95
+ a) You must cause the modified files to carry prominent notices
96
+ stating that you changed the files and the date of any change.
97
+
98
+ b) You must cause any work that you distribute or publish, that in
99
+ whole or in part contains or is derived from the Program or any
100
+ part thereof, to be licensed as a whole at no charge to all third
101
+ parties under the terms of this License.
102
+
103
+ c) If the modified program normally reads commands interactively
104
+ when run, you must cause it, when started running for such
105
+ interactive use in the most ordinary way, to print or display an
106
+ announcement including an appropriate copyright notice and a
107
+ notice that there is no warranty (or else, saying that you provide
108
+ a warranty) and that users may redistribute the program under
109
+ these conditions, and telling the user how to view a copy of this
110
+ License. (Exception: if the Program itself is interactive but
111
+ does not normally print such an announcement, your work based on
112
+ the Program is not required to print an announcement.)
113
+
114
+ These requirements apply to the modified work as a whole. If
115
+ identifiable sections of that work are not derived from the Program,
116
+ and can be reasonably considered independent and separate works in
117
+ themselves, then this License, and its terms, do not apply to those
118
+ sections when you distribute them as separate works. But when you
119
+ distribute the same sections as part of a whole which is a work based
120
+ on the Program, the distribution of the whole must be on the terms of
121
+ this License, whose permissions for other licensees extend to the
122
+ entire whole, and thus to each and every part regardless of who wrote it.
123
+
124
+ Thus, it is not the intent of this section to claim rights or contest
125
+ your rights to work written entirely by you; rather, the intent is to
126
+ exercise the right to control the distribution of derivative or
127
+ collective works based on the Program.
128
+
129
+ In addition, mere aggregation of another work not based on the Program
130
+ with the Program (or with a work based on the Program) on a volume of
131
+ a storage or distribution medium does not bring the other work under
132
+ the scope of this License.
133
+
134
+ 3. You may copy and distribute the Program (or a work based on it,
135
+ under Section 2) in object code or executable form under the terms of
136
+ Sections 1 and 2 above provided that you also do one of the following:
137
+
138
+ a) Accompany it with the complete corresponding machine-readable
139
+ source code, which must be distributed under the terms of Sections
140
+ 1 and 2 above on a medium customarily used for software interchange; or,
141
+
142
+ b) Accompany it with a written offer, valid for at least three
143
+ years, to give any third party, for a charge no more than your
144
+ cost of physically performing source distribution, a complete
145
+ machine-readable copy of the corresponding source code, to be
146
+ distributed under the terms of Sections 1 and 2 above on a medium
147
+ customarily used for software interchange; or,
148
+
149
+ c) Accompany it with the information you received as to the offer
150
+ to distribute corresponding source code. (This alternative is
151
+ allowed only for noncommercial distribution and only if you
152
+ received the program in object code or executable form with such
153
+ an offer, in accord with Subsection b above.)
154
+
155
+ The source code for a work means the preferred form of the work for
156
+ making modifications to it. For an executable work, complete source
157
+ code means all the source code for all modules it contains, plus any
158
+ associated interface definition files, plus the scripts used to
159
+ control compilation and installation of the executable. However, as a
160
+ special exception, the source code distributed need not include
161
+ anything that is normally distributed (in either source or binary
162
+ form) with the major components (compiler, kernel, and so on) of the
163
+ operating system on which the executable runs, unless that component
164
+ itself accompanies the executable.
165
+
166
+ If distribution of executable or object code is made by offering
167
+ access to copy from a designated place, then offering equivalent
168
+ access to copy the source code from the same place counts as
169
+ distribution of the source code, even though third parties are not
170
+ compelled to copy the source along with the object code.
171
+
172
+ 4. You may not copy, modify, sublicense, or distribute the Program
173
+ except as expressly provided under this License. Any attempt
174
+ otherwise to copy, modify, sublicense or distribute the Program is
175
+ void, and will automatically terminate your rights under this License.
176
+ However, parties who have received copies, or rights, from you under
177
+ this License will not have their licenses terminated so long as such
178
+ parties remain in full compliance.
179
+
180
+ 5. You are not required to accept this License, since you have not
181
+ signed it. However, nothing else grants you permission to modify or
182
+ distribute the Program or its derivative works. These actions are
183
+ prohibited by law if you do not accept this License. Therefore, by
184
+ modifying or distributing the Program (or any work based on the
185
+ Program), you indicate your acceptance of this License to do so, and
186
+ all its terms and conditions for copying, distributing or modifying
187
+ the Program or works based on it.
188
+
189
+ 6. Each time you redistribute the Program (or any work based on the
190
+ Program), the recipient automatically receives a license from the
191
+ original licensor to copy, distribute or modify the Program subject to
192
+ these terms and conditions. You may not impose any further
193
+ restrictions on the recipients' exercise of the rights granted herein.
194
+ You are not responsible for enforcing compliance by third parties to
195
+ this License.
196
+
197
+ 7. If, as a consequence of a court judgment or allegation of patent
198
+ infringement or for any other reason (not limited to patent issues),
199
+ conditions are imposed on you (whether by court order, agreement or
200
+ otherwise) that contradict the conditions of this License, they do not
201
+ excuse you from the conditions of this License. If you cannot
202
+ distribute so as to satisfy simultaneously your obligations under this
203
+ License and any other pertinent obligations, then as a consequence you
204
+ may not distribute the Program at all. For example, if a patent
205
+ license would not permit royalty-free redistribution of the Program by
206
+ all those who receive copies directly or indirectly through you, then
207
+ the only way you could satisfy both it and this License would be to
208
+ refrain entirely from distribution of the Program.
209
+
210
+ If any portion of this section is held invalid or unenforceable under
211
+ any particular circumstance, the balance of the section is intended to
212
+ apply and the section as a whole is intended to apply in other
213
+ circumstances.
214
+
215
+ It is not the purpose of this section to induce you to infringe any
216
+ patents or other property right claims or to contest validity of any
217
+ such claims; this section has the sole purpose of protecting the
218
+ integrity of the free software distribution system, which is
219
+ implemented by public license practices. Many people have made
220
+ generous contributions to the wide range of software distributed
221
+ through that system in reliance on consistent application of that
222
+ system; it is up to the author/donor to decide if he or she is willing
223
+ to distribute software through any other system and a licensee cannot
224
+ impose that choice.
225
+
226
+ This section is intended to make thoroughly clear what is believed to
227
+ be a consequence of the rest of this License.
228
+
229
+ 8. If the distribution and/or use of the Program is restricted in
230
+ certain countries either by patents or by copyrighted interfaces, the
231
+ original copyright holder who places the Program under this License
232
+ may add an explicit geographical distribution limitation excluding
233
+ those countries, so that distribution is permitted only in or among
234
+ countries not thus excluded. In such case, this License incorporates
235
+ the limitation as if written in the body of this License.
236
+
237
+ 9. The Free Software Foundation may publish revised and/or new versions
238
+ of the General Public License from time to time. Such new versions will
239
+ be similar in spirit to the present version, but may differ in detail to
240
+ address new problems or concerns.
241
+
242
+ Each version is given a distinguishing version number. If the Program
243
+ specifies a version number of this License which applies to it and "any
244
+ later version", you have the option of following the terms and conditions
245
+ either of that version or of any later version published by the Free
246
+ Software Foundation. If the Program does not specify a version number of
247
+ this License, you may choose any version ever published by the Free Software
248
+ Foundation.
249
+
250
+ 10. If you wish to incorporate parts of the Program into other free
251
+ programs whose distribution conditions are different, write to the author
252
+ to ask for permission. For software which is copyrighted by the Free
253
+ Software Foundation, write to the Free Software Foundation; we sometimes
254
+ make exceptions for this. Our decision will be guided by the two goals
255
+ of preserving the free status of all derivatives of our free software and
256
+ of promoting the sharing and reuse of software generally.
257
+
258
+ NO WARRANTY
259
+
260
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261
+ FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
262
+ OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263
+ PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264
+ OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265
+ MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
266
+ TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
267
+ PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268
+ REPAIR OR CORRECTION.
269
+
270
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271
+ WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272
+ REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273
+ INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274
+ OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275
+ TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276
+ YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277
+ PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278
+ POSSIBILITY OF SUCH DAMAGES.
279
+
280
+ END OF TERMS AND CONDITIONS
281
+
282
+ How to Apply These Terms to Your New Programs
283
+
284
+ If you develop a new program, and you want it to be of the greatest
285
+ possible use to the public, the best way to achieve this is to make it
286
+ free software which everyone can redistribute and change under these terms.
287
+
288
+ To do so, attach the following notices to the program. It is safest
289
+ to attach them to the start of each source file to most effectively
290
+ convey the exclusion of warranty; and each file should have at least
291
+ the "copyright" line and a pointer to where the full notice is found.
292
+
293
+ {description}
294
+ Copyright (C) {year} {fullname}
295
+
296
+ This program is free software; you can redistribute it and/or modify
297
+ it under the terms of the GNU General Public License as published by
298
+ the Free Software Foundation; either version 2 of the License, or
299
+ (at your option) any later version.
300
+
301
+ This program is distributed in the hope that it will be useful,
302
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
303
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
304
+ GNU General Public License for more details.
305
+
306
+ You should have received a copy of the GNU General Public License along
307
+ with this program; if not, write to the Free Software Foundation, Inc.,
308
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309
+
310
+ Also add information on how to contact you by electronic and paper mail.
311
+
312
+ If the program is interactive, make it output a short notice like this
313
+ when it starts in an interactive mode:
314
+
315
+ Gnomovision version 69, Copyright (C) year name of author
316
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317
+ This is free software, and you are welcome to redistribute it
318
+ under certain conditions; type `show c' for details.
319
+
320
+ The hypothetical commands `show w' and `show c' should show the appropriate
321
+ parts of the General Public License. Of course, the commands you use may
322
+ be called something other than `show w' and `show c'; they could even be
323
+ mouse-clicks or menu items--whatever suits your program.
324
+
325
+ You should also get your employer (if you work as a programmer) or your
326
+ school, if any, to sign a "copyright disclaimer" for the program, if
327
+ necessary. Here is a sample; alter the names:
328
+
329
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
331
+
332
+ {signature of Ty Coon}, 1 April 1989
333
+ Ty Coon, President of Vice
334
+
335
+ This General Public License does not permit incorporating your program into
336
+ proprietary programs. If your program is a subroutine library, you may
337
+ consider it more useful to permit linking proprietary applications with the
338
+ library. If this is what you want to do, use the GNU Lesser General
339
+ Public License instead of this License.
340
+
data/README.md ADDED
@@ -0,0 +1,92 @@
1
+ # Breakout
2
+
3
+ :fire: [BreakoutDetection](https://github.com/twitter/BreakoutDetection) for Ruby
4
+
5
+ Learn more about [how it works](https://blog.twitter.com/engineering/en_us/a/2014/breakout-detection-in-the-wild)
6
+
7
+ [![Build Status](https://github.com/ankane/breakout/workflows/build/badge.svg?branch=master)](https://github.com/ankane/breakout/actions)
8
+
9
+ ## Installation
10
+
11
+ Add this line to your application’s Gemfile:
12
+
13
+ ```ruby
14
+ gem 'breakout-detection'
15
+ ```
16
+
17
+ ## Getting Started
18
+
19
+ Detect breakouts in a time series
20
+
21
+ ```ruby
22
+ series = {
23
+ Date.parse("2020-01-01") => 100,
24
+ Date.parse("2020-01-02") => 150,
25
+ Date.parse("2020-01-03") => 136,
26
+ # ...
27
+ }
28
+
29
+ Breakout.detect(series)
30
+ ```
31
+
32
+ Works great with [Groupdate](https://github.com/ankane/groupdate)
33
+
34
+ ```ruby
35
+ series = User.group_by_day(:created_at).count
36
+ Breakout.detect(series)
37
+ ```
38
+
39
+ Series can also be an array without times (the index is returned)
40
+
41
+ ```ruby
42
+ series = [100, 150, 136, ...]
43
+ Breakout.detect(series)
44
+ ```
45
+
46
+ ## Options
47
+
48
+ Pass options - default values below
49
+
50
+ ```ruby
51
+ Breakout.detect(
52
+ series,
53
+ min_size: 30, # minimum observations between breakouts
54
+ method: "multi", # multi or amoc (at most one change)
55
+ degree: 1, # degree of the penalization polynomial (multi only)
56
+ beta: 0.008, # penalization term (multi only)
57
+ percent: nil, # minimum percent change in goodness of fit statistic (multi only)
58
+ alpha: 2, # weight of the distance between observations (amoc only)
59
+ exact: true # exact or approximate median (amoc only)
60
+ )
61
+ ```
62
+
63
+ ## Credits
64
+
65
+ This library uses the C++ code from the [BreakoutDetection](https://github.com/twitter/BreakoutDetection) R package and is available under the same license.
66
+
67
+ ## References
68
+
69
+ - [Leveraging Cloud Data to Mitigate User Experience from ‘Breaking Bad’](https://arxiv.org/abs/1411.7955)
70
+
71
+ ## History
72
+
73
+ View the [changelog](https://github.com/ankane/breakout/blob/master/CHANGELOG.md)
74
+
75
+ ## Contributing
76
+
77
+ Everyone is encouraged to help improve this project. Here are a few ways you can help:
78
+
79
+ - [Report bugs](https://github.com/ankane/breakout/issues)
80
+ - Fix bugs and [submit pull requests](https://github.com/ankane/breakout/pulls)
81
+ - Write, clarify, or fix documentation
82
+ - Suggest or add new features
83
+
84
+ To get started with development:
85
+
86
+ ```sh
87
+ git clone https://github.com/ankane/breakout.git
88
+ cd breakout
89
+ bundle install
90
+ bundle exec rake compile
91
+ bundle exec rake test
92
+ ```
@@ -0,0 +1,93 @@
1
+ #include <algorithm>
2
+ #include <cmath>
3
+ #include <set>
4
+ #include <vector>
5
+ #include "helper.h"
6
+
7
+ // Z: time series
8
+ // min_size: minimum segment size
9
+ // beta: penalization term for the addition of a change point
10
+
11
+ std::vector<int> EDM_multi(const std::vector<double>& Z, int min_size = 24, double beta = 0, int degree = 0) {
12
+
13
+ // identify which type of penalization to use
14
+ double (*G)(double);
15
+ switch (degree) {
16
+ case 1:
17
+ G = Linear;
18
+ break;
19
+ case 2:
20
+ G = Quadratic;
21
+ break;
22
+ default:
23
+ G = Const;
24
+ break;
25
+ }
26
+
27
+ int n = Z.size();
28
+ if (beta < 0) // assume that beta is a positive number
29
+ beta = -beta;
30
+ std::vector<int> prev(n + 1, 0); // store optimal location of previous change point
31
+ std::vector<int> number(n + 1, 0); // store the number of change points in optimal segmentation
32
+ std::vector<double> F(n + 1, -3); // store optimal statistic value
33
+ // F[s] is calculated using observations { Z[0], Z[1], ..., Z[s-1] }
34
+
35
+ // trees used to store the "upper half" of the considered observations
36
+ std::multiset<double> right_min, left_min;
37
+ // trees used to store the "lower half" of the considered observations
38
+ std::multiset<double, std::greater<double>> right_max, left_max;
39
+
40
+ // Iterate over possible locations for the last change
41
+ for (int s = 2 * min_size; s < n + 1; ++s) {
42
+ right_max.clear();
43
+ right_min.clear(); // clear right trees
44
+ left_max.clear();
45
+ left_min.clear(); // clear left trees
46
+
47
+ // initialize left and right trees to account for minimum segment size
48
+ for (int i = prev[min_size - 1]; i < min_size - 1; ++i)
49
+ insert_element(left_min, left_max, Z[i]);
50
+ for (int i = min_size - 1; i < s; ++i)
51
+ insert_element(right_min, right_max, Z[i]);
52
+
53
+ // Iterate over possible locations for the penultiamte change
54
+ for (int t = min_size; t < s - min_size + 1; ++t) { // modify limits to deal with min_size
55
+ insert_element(left_min, left_max, Z[t - 1]); // insert element into left tree
56
+ remove_element(right_min, right_max, Z[t - 1]); // remove element from right tree
57
+ // left tree now has { Z[prev[t-1]], ..., Z[t-1] }
58
+ // right tree now has { Z[t], ..., Z[s-1] }
59
+
60
+ // check to see if optimal position of previous change point has changed
61
+ // if so update the left tree
62
+ if (prev[t] > prev[t - 1]) {
63
+ for (int i = prev[t - 1]; i < prev[t]; ++i)
64
+ remove_element(left_min, left_max, Z[i]);
65
+ } else if (prev[t] < prev[t - 1]) {
66
+ for (int i = prev[t]; i < prev[t - 1]; ++i)
67
+ insert_element(left_min, left_max, Z[i]);
68
+ }
69
+
70
+ // calculate statistic value
71
+ double left_median = get_median(left_min, left_max), right_median = get_median(right_min, right_max);
72
+ double normalize = ((t - prev[t]) * (s - t)) / (std::pow((double)(s - prev[t]), 2.0));
73
+ double tmp = F[t] + normalize * std::pow(left_median - right_median, 2.0) - beta * G(number[t]);
74
+ // check for improved optimal statistic value
75
+ if (tmp > F[s]) {
76
+ number[s] = number[t] + 1;
77
+ F[s] = tmp;
78
+ prev[s] = t;
79
+ }
80
+ }
81
+ }
82
+
83
+ // obtain list of optimal change point estimates
84
+ std::vector<int> ret;
85
+ int at = n;
86
+ while (at) {
87
+ if (prev[at]) // don't insert 0 as a change point estimate
88
+ ret.push_back(prev[at]);
89
+ at = prev[at];
90
+ }
91
+ sort(ret.begin(), ret.end());
92
+ return ret;
93
+ }
@@ -0,0 +1,105 @@
1
+ /*
2
+ Penalizes based on percent chagne in the statistic value.
3
+ Linear penalty means that each new breakout must result in an at least X% increast
4
+ Quadratic penalty means that each new brekaout must result in at least an (X*k)% increase for k breakouts
5
+ */
6
+
7
+ #include <algorithm>
8
+ #include <cmath>
9
+ #include <set>
10
+ #include <vector>
11
+ #include "helper.h"
12
+
13
+ std::vector<int> EDM_percent(const std::vector<double>& Z, int min_size = 24, double percent = 0, int degree = 0) {
14
+ // Z: time series
15
+ // min_size: minimum segment size
16
+ // beta: penalization term for the addition of a change point
17
+
18
+ // identify which type of penalization to use
19
+ double (*G)(double);
20
+ switch (degree) {
21
+ case 1:
22
+ G = Linear;
23
+ break;
24
+ case 2:
25
+ G = Quadratic;
26
+ break;
27
+ default:
28
+ G = Const;
29
+ break;
30
+ }
31
+
32
+ int n = Z.size();
33
+
34
+ std::vector<int> prev(n + 1, 0); // store optimal location of previous change point
35
+ std::vector<int> number(n + 1, 0); // store the number of change points in optimal segmentation
36
+ std::vector<double> F(n + 1, 0); // store optimal statistic value
37
+ // F[s] is calculated using observations { Z[0], Z[1], ..., Z[s-1] }
38
+
39
+ // trees used to store the "upper half" of the considered observations
40
+ std::multiset<double> right_min, left_min;
41
+ // trees used to store the "lower half" of the considered observations
42
+ std::multiset<double, std::greater<double>> right_max, left_max;
43
+
44
+ // Iterate over possible locations for the last change
45
+ for (int s = 2 * min_size; s < n + 1; ++s) {
46
+ right_max.clear();
47
+ right_min.clear(); // clear right trees
48
+ left_max.clear();
49
+ left_min.clear(); // clear left trees
50
+
51
+ // initialize left and right trees to account for minimum segment size
52
+ for (int i = prev[min_size - 1]; i < min_size - 1; ++i)
53
+ insert_element(left_min, left_max, Z[i]);
54
+ for (int i = min_size - 1; i < s; ++i)
55
+ insert_element(right_min, right_max, Z[i]);
56
+
57
+ // Iterate over possible locations for the penultimate change
58
+ for (int t = min_size; t < s - min_size + 1; ++t) { // modify limits to deal with min_size
59
+ insert_element(left_min, left_max, Z[t - 1]); // insert element into left tree
60
+ remove_element(right_min, right_max, Z[t - 1]); // remove element from right tree
61
+ // left tree now has { Z[prev[t-1]], ..., Z[t-1] }
62
+ // right tree now has { Z[t], ..., Z[s-1] }
63
+
64
+ // check to see if optimal position of previous change point has changed
65
+ // if so update the left tree
66
+ if (prev[t] > prev[t - 1]) {
67
+ for (int i = prev[t - 1]; i < prev[t]; ++i)
68
+ remove_element(left_min, left_max, Z[i]);
69
+ } else if (prev[t] < prev[t - 1]) {
70
+ for (int i = prev[t]; i < prev[t - 1]; ++i)
71
+ insert_element(left_min, left_max, Z[i]);
72
+ }
73
+
74
+ // calculate statistic value
75
+ double left_median = get_median(left_min, left_max), right_median = get_median(right_min, right_max);
76
+ double normalize = ((t - prev[t]) * (s - t)) / (std::pow(static_cast<double>(s - prev[t]), 2));
77
+ double tmp = F[t] + normalize * std::pow(static_cast<double>(left_median - right_median), 2);
78
+ // Find best location for change point. check % condition later
79
+ if (tmp > F[s]) {
80
+ number[s] = number[t] + 1;
81
+ F[s] = tmp;
82
+ prev[s] = t;
83
+ }
84
+ }
85
+ // check to make sure we meet the percent change requirement
86
+ if (prev[s]) {
87
+ if (F[s] - F[prev[s]] < percent * G(number[prev[s]]) * F[prev[s]]) {
88
+ number[s] = number[prev[s]];
89
+ F[s] = F[prev[s]];
90
+ prev[s] = prev[prev[s]];
91
+ }
92
+ }
93
+ }
94
+
95
+ // obtain list of optimal change point estimates
96
+ std::vector<int> ret;
97
+ int at = n;
98
+ while (at) {
99
+ if (prev[at]) // don't insert 0 as a change point estimate
100
+ ret.push_back(prev[at]);
101
+ at = prev[at];
102
+ }
103
+ sort(ret.begin(), ret.end());
104
+ return ret;
105
+ }
@@ -0,0 +1,380 @@
1
+ /*
2
+ This version calculates the between distance using the delta points around the change point estimate.
3
+ */
4
+
5
+ #include <algorithm>
6
+ #include <cmath>
7
+ #include <iostream>
8
+ #include <vector>
9
+
10
+ // Class used to hold all the information about the
11
+ // breakout location and the interval trees
12
+ struct Information {
13
+ std::vector<double> A, B, AB;
14
+ double best_stat;
15
+ int best_loc, best_t2;
16
+ int min_size, b;
17
+
18
+ Information(int, int);
19
+ };
20
+
21
+ Information::Information(int bb, int m) {
22
+ A = std::vector<double>(1 << (bb + 1));
23
+ B = std::vector<double>(1 << (bb + 1));
24
+ AB = std::vector<double>(1 << (bb + 1));
25
+ b = bb;
26
+ best_stat = best_loc = best_t2 = -3;
27
+ min_size = m;
28
+ }
29
+
30
+ void BackwardUpdate(std::vector<double>& Z, Information& info, int& tau1, double quant, double alpha);
31
+ void ForwardUpdate(std::vector<double>& Z, Information& info, int& tau1, double quant, double alpha);
32
+
33
+ int GetIndex(int B, double x) {
34
+ // Get index of leaf node interval containing x
35
+ return (int)std::ceil(std::abs(x) * (1 << B)) + (1 << B) - 1;
36
+ }
37
+
38
+ double GetQuantile(std::vector<double>& x, double quant) {
39
+ // Return approximate quantile based on the interval tree
40
+
41
+ int N = x.size();
42
+ int k = std::ceil(x[1] * quant);
43
+ double l = 0, u = 1;
44
+ int i = 1, j;
45
+ while (i < N) { // Make sure that we do not go beyond the array bounds
46
+ j = i << 1;
47
+ if (j >= N)
48
+ break;
49
+ if (x[i] == k) { // Exactly k elements in this node's subtree. So can terminate early
50
+ // Return a weighted combination of the child node medians
51
+ double lWeight = x[j] / (x[j] + x[j + 1]);
52
+ double rWeight = 1 - lWeight;
53
+ double lu, rl;
54
+ lu = (u + l) / 2;
55
+ rl = (u + lu) / 2;
56
+ return lWeight * (quant * (lu - l) + l) + rWeight * (quant * (u - rl) + rl);
57
+ } else if (x[j] >= k) { // More than k elements in node's left child's subtree, move to left child
58
+ i = j;
59
+ u = (l + u) / 2;
60
+ } else if (x[j] < k) { // Not enough elements in node's left child's subtree, move to right child
61
+ k -= x[j];
62
+ i = j + 1;
63
+ l = (l + u) / 2;
64
+ }
65
+ }
66
+ return quant * (u - l) + l;
67
+ }
68
+
69
+ std::vector<int> AddToTree(int B, std::vector<double>& x) {
70
+ std::vector<int> A(1 << (B + 1));
71
+ std::vector<double>::iterator i;
72
+ for (i = x.begin(); i < x.end(); ++i) { // Iterage over items we wish to add to the tree
73
+ int index = GetIndex(B, *i);
74
+ while (index) {
75
+ ++A[index];
76
+ index /= 2;
77
+ }
78
+ }
79
+ return A;
80
+ }
81
+
82
+ std::vector<int> EDM_tail(std::vector<double>& Z, int min_size = 24, double alpha = 2, double quant = 0.5) {
83
+
84
+ int N = Z.size();
85
+ int eps = (int)std::ceil(std::log(N));
86
+ eps = std::max(eps, 10);
87
+
88
+ Information info(eps, min_size);
89
+
90
+ int tau1 = info.min_size;
91
+ int tau2 = tau1 * 2;
92
+
93
+ // Populate trees and calculate statistic value for starting configuration of
94
+ // 2 min_size segments
95
+ for (int i = 0; i < tau1; ++i) {
96
+ for (int j = i + 1; j < tau1; ++j) {
97
+ int index = GetIndex(info.b, Z[i] - Z[j]);
98
+ while (index) {
99
+ ++info.A[index];
100
+ index /= 2;
101
+ }
102
+ }
103
+ }
104
+
105
+ // Populate trees and calculate statistic value for starting configuration of
106
+ // 2 min_size segments
107
+ for (int i = tau1; i < tau2; ++i) {
108
+ for (int j = i + 1; j < tau2; ++j) {
109
+ int index = GetIndex(info.b, Z[i] - Z[j]);
110
+ while (index) {
111
+ ++info.B[index];
112
+ index /= 2;
113
+ }
114
+ }
115
+ }
116
+
117
+ // Populate trees and calculate statistic value for starting configuration of
118
+ // 2 min_size segments
119
+ for (int i = 0; i < tau1; ++i) {
120
+ for (int j = tau1; j < tau2; ++j) {
121
+ int index = GetIndex(info.b, Z[i] - Z[j]);
122
+ while (index) {
123
+ ++info.AB[index];
124
+ index /= 2;
125
+ }
126
+ }
127
+ }
128
+
129
+ double qa, qb, qc, stat;
130
+
131
+ qa = std::pow(GetQuantile(info.A, quant), alpha);
132
+ qb = std::pow(GetQuantile(info.B, quant), alpha);
133
+ qc = std::pow(GetQuantile(info.AB, quant), alpha);
134
+
135
+ stat = 2 * qc - qa - qb;
136
+ stat *= (double)(tau1) * (tau2 - tau1) / (tau2);
137
+
138
+ info.best_stat = stat;
139
+ info.best_loc = tau1;
140
+ info.best_t2 = tau2;
141
+
142
+ // Increment tau2 and update trees and statistic
143
+ ++tau2;
144
+ for (; tau2 < N + 1; ++tau2) {
145
+ int index = GetIndex(info.b, Z[tau2 - 1] - Z[tau2 - 2]);
146
+ while (index) { // array position 0 is not used, so we exit once we reach this location
147
+ ++info.B[index];
148
+ index /= 2;
149
+ }
150
+ qb = std::pow(GetQuantile(info.B, quant), alpha);
151
+ stat = 2 * qc - qa - qb;
152
+ stat *= (double)(tau2 - tau1) * tau1 / tau2;
153
+
154
+ if (stat > info.best_stat) {
155
+ info.best_stat = stat;
156
+ info.best_loc = tau1;
157
+ info.best_t2 = tau2;
158
+ }
159
+ }
160
+
161
+ bool forward_move = false;
162
+ // Initial consideration of other possible locations for tau1
163
+ while (tau1 < N - min_size) {
164
+ //"warm start" to update tree and statistic value for other prefix series
165
+ if (forward_move) {
166
+ ForwardUpdate(Z, info, tau1, quant, alpha);
167
+ } else {
168
+ BackwardUpdate(Z, info, tau1, quant, alpha);
169
+ }
170
+ forward_move = !forward_move;
171
+ }
172
+
173
+ std::vector<int> ret;
174
+ if (info.best_stat > 0) {
175
+ ret.push_back(info.best_loc);
176
+ }
177
+ return ret;
178
+ }
179
+
180
+ void ForwardUpdate(std::vector<double>& Z, Information& info, int& tau1, double quant, double alpha) {
181
+
182
+ int min_size = info.min_size;
183
+ int tau2 = tau1 + min_size;
184
+ ++tau1;
185
+ int N = Z.size(), index;
186
+ // Update A tree
187
+ for (int i = tau1 - min_size; i < tau1 - 1; ++i) {
188
+ index = GetIndex(info.b, Z[i] - Z[tau1 - 1]);
189
+ while (index) {
190
+ ++info.A[index];
191
+ index /= 2;
192
+ }
193
+ }
194
+ for (int i = tau1 - min_size; i < tau1; ++i) {
195
+ index = GetIndex(info.b, Z[i] - Z[tau1 - min_size - 1]);
196
+ while (index) {
197
+ --info.A[index];
198
+ index /= 2;
199
+ }
200
+ }
201
+ index = GetIndex(info.b, Z[tau1 - min_size - 1] - Z[tau1 - min_size]);
202
+ while (index) {
203
+ ++info.A[index];
204
+ index /= 2;
205
+ }
206
+ double qa = std::pow(GetQuantile(info.A, quant), alpha);
207
+
208
+ // Update AB tree
209
+ index = GetIndex(info.b, Z[tau1 - 1] - Z[tau1 - min_size - 1]);
210
+ while (index) {
211
+ --info.AB[index];
212
+ index /= 2;
213
+ }
214
+ for (int i = tau1; i < tau2; ++i) {
215
+ index = GetIndex(info.b, Z[i] - Z[tau1 - min_size - 1]);
216
+ while (index) {
217
+ --info.AB[index];
218
+ index /= 2;
219
+ }
220
+ index = GetIndex(info.b, Z[i] - Z[tau1 - 1]);
221
+ while (index) {
222
+ ++info.AB[index];
223
+ index /= 2;
224
+ }
225
+ }
226
+ for (int i = tau1 - min_size; i < tau1 - 1; ++i) {
227
+ index = GetIndex(info.b, Z[i] - Z[tau1 - 1]);
228
+ while (index) {
229
+ --info.AB[index];
230
+ index /= 2;
231
+ }
232
+ index = GetIndex(info.b, Z[i] - Z[tau2]);
233
+ while (index) {
234
+ ++info.AB[index];
235
+ index /= 2;
236
+ }
237
+ }
238
+ index = GetIndex(info.b, Z[tau1 - 1] - Z[tau2]);
239
+ while (index) {
240
+ ++info.AB[index];
241
+ index /= 2;
242
+ }
243
+ double qc = std::pow(GetQuantile(info.AB, quant), alpha);
244
+
245
+ // Update B tree
246
+ for (int i = tau1; i < tau2; ++i) {
247
+ index = GetIndex(info.b, Z[i] - Z[tau1 - 1]);
248
+ while (index) {
249
+ --info.B[index];
250
+ index /= 2;
251
+ }
252
+ index = GetIndex(info.b, Z[i] - Z[tau2]);
253
+ while (index) {
254
+ ++info.B[index];
255
+ index /= 2;
256
+ }
257
+ }
258
+
259
+ // Increment tau2 and update statistic value as we proceed
260
+ ++tau2;
261
+ for (; tau2 < N + 1; ++tau2) {
262
+ index = GetIndex(info.b, Z[tau2 - 1] - Z[tau2 - 2]);
263
+ while (index) {
264
+ ++info.B[index];
265
+ index /= 2;
266
+ }
267
+ double qb = std::pow(GetQuantile(info.B, quant), alpha);
268
+
269
+ double stat = 2 * qc - qa - qb;
270
+ stat *= (double)(tau2 - tau1) * tau1 / tau2;
271
+
272
+ if (stat > info.best_stat) {
273
+ info.best_stat = stat;
274
+ info.best_loc = tau1;
275
+ info.best_t2 = tau2;
276
+ }
277
+ }
278
+ }
279
+
280
+ void BackwardUpdate(std::vector<double>& Z, Information& info, int& tau1, double quant, double alpha) {
281
+
282
+ int min_size = info.min_size;
283
+ int tau2 = tau1 + min_size;
284
+ ++tau1;
285
+ int N = Z.size(), index;
286
+ // Update A tree
287
+ for (int i = tau1 - min_size; i < tau1 - 1; ++i) {
288
+ index = GetIndex(info.b, Z[i] - Z[tau1 - 1]);
289
+ while (index) {
290
+ ++info.A[index];
291
+ index /= 2;
292
+ }
293
+ }
294
+ for (int i = tau1 - min_size; i < tau1; ++i) {
295
+ index = GetIndex(info.b, Z[i] - Z[tau1 - min_size - 1]);
296
+ while (index) {
297
+ --info.A[index];
298
+ index /= 2;
299
+ }
300
+ }
301
+ index = GetIndex(info.b, Z[tau1 - min_size - 1] - Z[tau1 - min_size]);
302
+ while (index) {
303
+ ++info.A[index];
304
+ index /= 2;
305
+ }
306
+ double qa = std::pow(GetQuantile(info.A, quant), alpha);
307
+
308
+ // Update AB tree
309
+ index = GetIndex(info.b, Z[tau1 - 1] - Z[tau1 - min_size - 1]);
310
+ while (index) {
311
+ --info.AB[index];
312
+ index /= 2;
313
+ }
314
+ for (int i = tau1; i < tau2; ++i) {
315
+ index = GetIndex(info.b, Z[i] - Z[tau1 - min_size - 1]);
316
+ while (index) {
317
+ --info.AB[index];
318
+ index /= 2;
319
+ }
320
+ index = GetIndex(info.b, Z[i] - Z[tau1 - 1]);
321
+ while (index) {
322
+ ++info.AB[index];
323
+ index /= 2;
324
+ }
325
+ }
326
+ for (int i = tau1 - min_size; i < tau1 - 1; ++i) {
327
+ index = GetIndex(info.b, Z[i] - Z[tau1 - 1]);
328
+ while (index) {
329
+ --info.AB[index];
330
+ index /= 2;
331
+ }
332
+ index = GetIndex(info.b, Z[i] - Z[tau2]);
333
+ while (index) {
334
+ ++info.AB[index];
335
+ index /= 2;
336
+ }
337
+ }
338
+ index = GetIndex(info.b, Z[tau1 - 1] - Z[tau2]);
339
+ while (index) {
340
+ ++info.AB[index];
341
+ index /= 2;
342
+ }
343
+ double qc = std::pow(GetQuantile(info.AB, quant), alpha);
344
+
345
+ // Update B tree
346
+ for (int i = tau1; i < tau1 + min_size - 1; ++i) {
347
+ index = GetIndex(info.b, Z[tau1 + min_size - 1] - Z[i]);
348
+ while (index) {
349
+ ++info.B[index];
350
+ index /= 2;
351
+ }
352
+ index = GetIndex(info.b, Z[i] - Z[tau1 - 1]);
353
+ while (index) {
354
+ --info.B[index];
355
+ index /= 2;
356
+ }
357
+ }
358
+ double qb = std::pow(GetQuantile(info.B, quant), alpha);
359
+ // Move tau2 from the end of the time series to the front.
360
+ // Update the statistic value along the way
361
+ tau2 = N;
362
+
363
+ for (; tau2 >= tau1 + min_size; --tau2) {
364
+ index = GetIndex(info.b, Z[tau2 - 1] - Z[tau2 - 2]);
365
+ while (index) {
366
+ --info.B[index];
367
+ index /= 2;
368
+ }
369
+ qb = std::pow(GetQuantile(info.B, quant), alpha);
370
+
371
+ double stat = 2 * qc - qa - qb;
372
+ stat *= (double)(tau2 - tau1) * tau1 / tau2;
373
+
374
+ if (stat > info.best_stat) {
375
+ info.best_stat = stat;
376
+ info.best_loc = tau1;
377
+ info.best_t2 = tau2;
378
+ }
379
+ }
380
+ }
@@ -0,0 +1,90 @@
1
+ /*
2
+ Robust estimation of 2[mean(X)-mean(Y)]^2 time normalization factor
3
+ This is the E-Divisive E-statistic when alpha = 2
4
+ Instead of calculating mean(X) we calculate median(X), and similarly for Y
5
+ */
6
+
7
+ #include <algorithm>
8
+ #include <cmath>
9
+ #include <queue>
10
+ #include <vector>
11
+
12
+ void AddToHeaps(std::priority_queue<double, std::vector<double>, std::greater<double>>& m, std::priority_queue<double>& M, double x);
13
+
14
+ double getMedian(const std::priority_queue<double, std::vector<double>, std::greater<double>>& m, const std::priority_queue<double>& M);
15
+
16
+ std::vector<int> EDMX(const std::vector<double>& Z, int min_size = 24, double alpha = 2) {
17
+
18
+ alpha = 2; // Not used, just here for uniform funciton signature
19
+
20
+ std::priority_queue<double> LeftMax;
21
+ std::priority_queue<double, std::vector<double>, std::greater<double>> LeftMin;
22
+
23
+ double stat = -3, stat_best = -3, t1 = 0.0, t2;
24
+ int tau1, tau2;
25
+ int N = Z.size();
26
+ for (int i = 0; i < min_size - 1; ++i)
27
+ AddToHeaps(LeftMin, LeftMax, Z[i]);
28
+
29
+ for (tau1 = min_size; tau1 < N - min_size + 1; ++tau1) { // Iterate over breakout locations
30
+ AddToHeaps(LeftMin, LeftMax, Z[tau1 - 1]);
31
+ std::priority_queue<double> RightMax;
32
+ std::priority_queue<double, std::vector<double>, std::greater<double>> RightMin;
33
+ double medL = getMedian(LeftMin, LeftMax);
34
+
35
+ // Add first set of elements to the heaps for the right segment
36
+ for (std::vector<double>::const_iterator i = Z.begin() + tau1; i != Z.begin() + tau1 + min_size - 1; ++i)
37
+ AddToHeaps(RightMin, RightMax, *i);
38
+
39
+ for (tau2 = tau1 + min_size; tau2 < N + 1; ++tau2) { // Iterate over end of prefix series locations
40
+ AddToHeaps(RightMin, RightMax, Z[tau2 - 1]);
41
+ double medR = getMedian(RightMin, RightMax);
42
+
43
+ stat = std::pow(medL - medR, 2);
44
+ stat *= ((double)tau1 * (tau2 - tau1) / tau2);
45
+
46
+ if (stat > stat_best) {
47
+ t1 = tau1;
48
+ t2 = tau2;
49
+ stat_best = stat;
50
+ }
51
+ }
52
+ }
53
+
54
+ std::vector<int> ret;
55
+ if (stat_best > 0) {
56
+ ret.push_back(t1);
57
+ }
58
+ return ret;
59
+ }
60
+
61
+ // Use 2 heaps to keep track of the median (can also be adjusted for other quantiles). One heap
62
+ // for the "larger" and one heap for the "smaller" observations. Simple to update for streaming
63
+ // data ( O(log n) ) and find median ( O(1) ).
64
+
65
+ double getMedian(const std::priority_queue<double, std::vector<double>, std::greater<double>>& m, const std::priority_queue<double>& M) {
66
+
67
+ if (m.size() > M.size()) // There are an odd number of observations
68
+ return m.top();
69
+ else if (M.size() > m.size()) // There are an odd number of observations
70
+ return M.top();
71
+ else // There are an even number of obersations
72
+ return (m.top() + M.top()) / 2;
73
+ }
74
+
75
+ void AddToHeaps(std::priority_queue<double, std::vector<double>, std::greater<double>>& m, std::priority_queue<double>& M, double x) {
76
+
77
+ // decide on initial heap to place element into
78
+ if (m.empty() || x < m.top())
79
+ M.push(x);
80
+ else
81
+ m.push(x);
82
+ // make sure that heaps are balanced
83
+ if (m.size() > M.size() + 1) {
84
+ M.push(m.top());
85
+ m.pop();
86
+ } else if (M.size() > m.size() + 1) {
87
+ m.push(M.top());
88
+ M.pop();
89
+ }
90
+ }
@@ -0,0 +1,51 @@
1
+ // rice
2
+ #include <rice/rice.hpp>
3
+ #include <rice/stl.hpp>
4
+
5
+ std::vector<int> EDM_multi(const std::vector<double>& Z, int min_size = 24, double beta = 0, int degree = 0);
6
+ std::vector<int> EDM_percent(const std::vector<double>& Z, int min_size = 24, double percent = 0, int degree = 0);
7
+ std::vector<int> EDM_tail(std::vector<double>& Z, int min_size = 24, double alpha = 2, double quant = 0.5);
8
+ std::vector<int> EDMX(const std::vector<double>& Z, int min_size = 24, double alpha = 2);
9
+
10
+ extern "C"
11
+ void Init_ext() {
12
+ auto rb_mBreakout = Rice::define_module("Breakout");
13
+
14
+ rb_mBreakout
15
+ .define_singleton_function(
16
+ "_detect",
17
+ [](std::vector<double> z, int min_size, const std::string& method, double alpha, std::optional<double> beta, int degree, std::optional<double> percent, bool exact) {
18
+ auto minmax = std::minmax_element(z.begin(), z.end());
19
+ auto min = *minmax.first;
20
+ auto max = *minmax.second;
21
+ auto diff = max - min;
22
+ if (diff == 0) {
23
+ // constant series
24
+ return Rice::Array();
25
+ }
26
+ for (auto i = 0; i < z.size(); i++) {
27
+ z[i] = (z[i] - min) / diff;
28
+ }
29
+
30
+ std::vector<int> res;
31
+ if (method == "amoc") {
32
+ if (exact) {
33
+ res = EDMX(z, min_size, alpha);
34
+ } else {
35
+ res = EDM_tail(z, min_size, alpha);
36
+ }
37
+ } else {
38
+ if (percent.has_value()) {
39
+ res = EDM_percent(z, min_size, *percent, degree);
40
+ } else {
41
+ res = EDM_multi(z, min_size, beta.value_or(0.008), degree);
42
+ }
43
+ }
44
+
45
+ auto a = Rice::Array();
46
+ for (auto v : res) {
47
+ a.push(v);
48
+ }
49
+ return a;
50
+ });
51
+ }
@@ -0,0 +1,5 @@
1
+ require "mkmf-rice"
2
+
3
+ $CXXFLAGS += " -std=c++17 $(optflags)"
4
+
5
+ create_makefile("breakout/ext")
@@ -0,0 +1,69 @@
1
+ #include <algorithm>
2
+ #include <cmath>
3
+ #include <set>
4
+
5
+ extern double Linear(double x) { return 1; }
6
+ extern double Const(double x) { return 0; }
7
+ extern double Quadratic(double x) { return 2 * x + 1; }
8
+
9
+ /*
10
+ Use 2 multisets (red-black trees) to keep track of the median. One tree for the larger (m) and
11
+ one for the smaller (M) observations. Insertion and deletion in O(log(n)) and find
12
+ the median in O(1), additional memory use is O(n).
13
+ */
14
+
15
+ // insert x into the appropriate tree
16
+ extern void insert_element(std::multiset<double>& m, std::multiset<double, std::greater<double>>& M, double x) {
17
+
18
+ if (m.empty() || x < *(m.begin()))
19
+ M.insert(x);
20
+ else
21
+ m.insert(x);
22
+ if (m.size() > M.size() + 1) {
23
+ std::multiset<double>::iterator i;
24
+ i = m.begin();
25
+ M.insert(*i);
26
+ m.erase(m.begin());
27
+ } else if (M.size() > m.size() + 1) {
28
+ std::multiset<double, std::greater<double>>::iterator i;
29
+ i = M.begin();
30
+ m.insert(*i);
31
+ M.erase(M.begin());
32
+ }
33
+ }
34
+
35
+ // given a pair of trees obtain the median
36
+ extern double get_median(const std::multiset<double>& m, const std::multiset<double, std::greater<double>>& M) {
37
+
38
+ if (m.size() > M.size())
39
+ return *(m.begin());
40
+ else if (M.size() > m.size())
41
+ return *(M.begin());
42
+ else
43
+ return (*(M.begin()) + *(m.begin())) / 2;
44
+ }
45
+
46
+ // remove x from the tree, if multiple copies of x exist only remove 1
47
+ // since this method is never called by the user directly it is assumed
48
+ // that there is at least 1 copy of x
49
+ extern void remove_element(std::multiset<double>& m, std::multiset<double, std::greater<double>>& M, double x) {
50
+
51
+ if (x < *(m.begin())) {
52
+ std::multiset<double, std::greater<double>>::iterator i = M.find(x);
53
+ M.erase(i);
54
+ } else {
55
+ std::multiset<double>::iterator i = m.find(x);
56
+ m.erase(i);
57
+ }
58
+ if (m.size() > M.size() + 1) {
59
+ std::multiset<double>::iterator i;
60
+ i = m.begin();
61
+ M.insert(*i);
62
+ m.erase(m.begin());
63
+ } else if (M.size() > m.size() + 1) {
64
+ std::multiset<double, std::greater<double>>::iterator i;
65
+ i = M.begin();
66
+ m.insert(*i);
67
+ M.erase(M.begin());
68
+ }
69
+ }
@@ -0,0 +1,13 @@
1
+ #pragma once
2
+
3
+ #include <algorithm>
4
+ #include <cmath>
5
+ #include <set>
6
+
7
+ double get_median(const std::multiset<double>&, const std::multiset<double, std::greater<double>>&);
8
+ void insert_element(std::multiset<double>&, std::multiset<double, std::greater<double>>&, double);
9
+ void remove_element(std::multiset<double>&, std::multiset<double, std::greater<double>>&, double);
10
+
11
+ extern double Linear(double x);
12
+ extern double Const(double x);
13
+ extern double Quadratic(double x);
@@ -0,0 +1,3 @@
1
+ module Breakout
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1 @@
1
+ require "breakout"
data/lib/breakout.rb ADDED
@@ -0,0 +1,28 @@
1
+ # ext
2
+ require "breakout/ext"
3
+
4
+ # modules
5
+ require "breakout/version"
6
+
7
+ module Breakout
8
+ def self.detect(series, min_size: 30, method: "multi", alpha: 2, beta: nil, degree: 1, percent: nil, exact: true)
9
+ raise ArgumentError, "min_size must be at least 2" if min_size < 2
10
+ raise ArgumentError, "beta and percent cannot be passed together" unless beta.nil? || percent.nil?
11
+ raise ArgumentError, "alpha must be between 0 and 2" if alpha < 0 || alpha > 2
12
+ raise ArgumentError, "degree must be 0, 1, or 2" unless [0, 1, 2].include?(degree)
13
+ raise ArgumentError, "Bad method" unless ["amoc", "multi"].include?(method)
14
+
15
+ return [] if series.size < min_size
16
+
17
+ if series.is_a?(Hash)
18
+ sorted = series.sort_by { |k, _| k }
19
+ z = sorted.map(&:last)
20
+ else
21
+ z = series
22
+ end
23
+
24
+ res = _detect(z, min_size, method, alpha, beta, degree, percent, exact)
25
+ res.map! { |i| sorted[i][0] } if series.is_a?(Hash)
26
+ res
27
+ end
28
+ end
metadata ADDED
@@ -0,0 +1,71 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: breakout-detection
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Andrew Kane
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2021-09-03 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rice
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 4.0.2
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 4.0.2
27
+ description:
28
+ email: andrew@ankane.org
29
+ executables: []
30
+ extensions:
31
+ - ext/breakout/extconf.rb
32
+ extra_rdoc_files: []
33
+ files:
34
+ - CHANGELOG.md
35
+ - LICENSE.txt
36
+ - README.md
37
+ - ext/breakout/edm_multi.cpp
38
+ - ext/breakout/edm_percent.cpp
39
+ - ext/breakout/edm_tail.cpp
40
+ - ext/breakout/edmx.cpp
41
+ - ext/breakout/ext.cpp
42
+ - ext/breakout/extconf.rb
43
+ - ext/breakout/helper.cpp
44
+ - ext/breakout/helper.h
45
+ - lib/breakout-detection.rb
46
+ - lib/breakout.rb
47
+ - lib/breakout/version.rb
48
+ homepage: https://github.com/ankane/breakout
49
+ licenses:
50
+ - GPL-2.0-or-later
51
+ metadata: {}
52
+ post_install_message:
53
+ rdoc_options: []
54
+ require_paths:
55
+ - lib
56
+ required_ruby_version: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '2.6'
61
+ required_rubygems_version: !ruby/object:Gem::Requirement
62
+ requirements:
63
+ - - ">="
64
+ - !ruby/object:Gem::Version
65
+ version: '0'
66
+ requirements: []
67
+ rubygems_version: 3.2.22
68
+ signing_key:
69
+ specification_version: 4
70
+ summary: Breakout detection for Ruby
71
+ test_files: []