breakout-detection 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE.txt +340 -0
- data/README.md +92 -0
- data/ext/breakout/edm_multi.cpp +93 -0
- data/ext/breakout/edm_percent.cpp +105 -0
- data/ext/breakout/edm_tail.cpp +380 -0
- data/ext/breakout/edmx.cpp +90 -0
- data/ext/breakout/ext.cpp +51 -0
- data/ext/breakout/extconf.rb +5 -0
- data/ext/breakout/helper.cpp +69 -0
- data/ext/breakout/helper.h +13 -0
- data/lib/breakout/version.rb +3 -0
- data/lib/breakout-detection.rb +1 -0
- data/lib/breakout.rb +28 -0
- metadata +71 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 8f81b5a2a9781787903f293322eef1ff6d08479cde2df25eea47c3023cd501bd
|
4
|
+
data.tar.gz: 1839625455de87fca184eb2f1fdf8bc7a2e8809c8afd17b9fc9ec61d6d775b7a
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 1fa14ae2cc9db547bdeb13a3232d87c96e239aa6dde618ea6b7fa9c53aaca5596b45223c3e7eeceb9a8aa0428de77c54c4e8275adf57b9767c13a889538981bb
|
7
|
+
data.tar.gz: 4ba5ee1decd2ab9084c3d377ec6d37a743c7f5ff47bb48eb55bdd5ecb9bce57df534522643c448b00a35cdd39d1a429ebc1c036440b079046c321bbef5e193a1
|
data/CHANGELOG.md
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,340 @@
|
|
1
|
+
GNU GENERAL PUBLIC LICENSE
|
2
|
+
Version 2, June 1991
|
3
|
+
|
4
|
+
Copyright (C) 1989, 1991 Free Software Foundation, Inc., <http://fsf.org/>
|
5
|
+
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
6
|
+
Everyone is permitted to copy and distribute verbatim copies
|
7
|
+
of this license document, but changing it is not allowed.
|
8
|
+
|
9
|
+
Preamble
|
10
|
+
|
11
|
+
The licenses for most software are designed to take away your
|
12
|
+
freedom to share and change it. By contrast, the GNU General Public
|
13
|
+
License is intended to guarantee your freedom to share and change free
|
14
|
+
software--to make sure the software is free for all its users. This
|
15
|
+
General Public License applies to most of the Free Software
|
16
|
+
Foundation's software and to any other program whose authors commit to
|
17
|
+
using it. (Some other Free Software Foundation software is covered by
|
18
|
+
the GNU Lesser General Public License instead.) You can apply it to
|
19
|
+
your programs, too.
|
20
|
+
|
21
|
+
When we speak of free software, we are referring to freedom, not
|
22
|
+
price. Our General Public Licenses are designed to make sure that you
|
23
|
+
have the freedom to distribute copies of free software (and charge for
|
24
|
+
this service if you wish), that you receive source code or can get it
|
25
|
+
if you want it, that you can change the software or use pieces of it
|
26
|
+
in new free programs; and that you know you can do these things.
|
27
|
+
|
28
|
+
To protect your rights, we need to make restrictions that forbid
|
29
|
+
anyone to deny you these rights or to ask you to surrender the rights.
|
30
|
+
These restrictions translate to certain responsibilities for you if you
|
31
|
+
distribute copies of the software, or if you modify it.
|
32
|
+
|
33
|
+
For example, if you distribute copies of such a program, whether
|
34
|
+
gratis or for a fee, you must give the recipients all the rights that
|
35
|
+
you have. You must make sure that they, too, receive or can get the
|
36
|
+
source code. And you must show them these terms so they know their
|
37
|
+
rights.
|
38
|
+
|
39
|
+
We protect your rights with two steps: (1) copyright the software, and
|
40
|
+
(2) offer you this license which gives you legal permission to copy,
|
41
|
+
distribute and/or modify the software.
|
42
|
+
|
43
|
+
Also, for each author's protection and ours, we want to make certain
|
44
|
+
that everyone understands that there is no warranty for this free
|
45
|
+
software. If the software is modified by someone else and passed on, we
|
46
|
+
want its recipients to know that what they have is not the original, so
|
47
|
+
that any problems introduced by others will not reflect on the original
|
48
|
+
authors' reputations.
|
49
|
+
|
50
|
+
Finally, any free program is threatened constantly by software
|
51
|
+
patents. We wish to avoid the danger that redistributors of a free
|
52
|
+
program will individually obtain patent licenses, in effect making the
|
53
|
+
program proprietary. To prevent this, we have made it clear that any
|
54
|
+
patent must be licensed for everyone's free use or not licensed at all.
|
55
|
+
|
56
|
+
The precise terms and conditions for copying, distribution and
|
57
|
+
modification follow.
|
58
|
+
|
59
|
+
GNU GENERAL PUBLIC LICENSE
|
60
|
+
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
61
|
+
|
62
|
+
0. This License applies to any program or other work which contains
|
63
|
+
a notice placed by the copyright holder saying it may be distributed
|
64
|
+
under the terms of this General Public License. The "Program", below,
|
65
|
+
refers to any such program or work, and a "work based on the Program"
|
66
|
+
means either the Program or any derivative work under copyright law:
|
67
|
+
that is to say, a work containing the Program or a portion of it,
|
68
|
+
either verbatim or with modifications and/or translated into another
|
69
|
+
language. (Hereinafter, translation is included without limitation in
|
70
|
+
the term "modification".) Each licensee is addressed as "you".
|
71
|
+
|
72
|
+
Activities other than copying, distribution and modification are not
|
73
|
+
covered by this License; they are outside its scope. The act of
|
74
|
+
running the Program is not restricted, and the output from the Program
|
75
|
+
is covered only if its contents constitute a work based on the
|
76
|
+
Program (independent of having been made by running the Program).
|
77
|
+
Whether that is true depends on what the Program does.
|
78
|
+
|
79
|
+
1. You may copy and distribute verbatim copies of the Program's
|
80
|
+
source code as you receive it, in any medium, provided that you
|
81
|
+
conspicuously and appropriately publish on each copy an appropriate
|
82
|
+
copyright notice and disclaimer of warranty; keep intact all the
|
83
|
+
notices that refer to this License and to the absence of any warranty;
|
84
|
+
and give any other recipients of the Program a copy of this License
|
85
|
+
along with the Program.
|
86
|
+
|
87
|
+
You may charge a fee for the physical act of transferring a copy, and
|
88
|
+
you may at your option offer warranty protection in exchange for a fee.
|
89
|
+
|
90
|
+
2. You may modify your copy or copies of the Program or any portion
|
91
|
+
of it, thus forming a work based on the Program, and copy and
|
92
|
+
distribute such modifications or work under the terms of Section 1
|
93
|
+
above, provided that you also meet all of these conditions:
|
94
|
+
|
95
|
+
a) You must cause the modified files to carry prominent notices
|
96
|
+
stating that you changed the files and the date of any change.
|
97
|
+
|
98
|
+
b) You must cause any work that you distribute or publish, that in
|
99
|
+
whole or in part contains or is derived from the Program or any
|
100
|
+
part thereof, to be licensed as a whole at no charge to all third
|
101
|
+
parties under the terms of this License.
|
102
|
+
|
103
|
+
c) If the modified program normally reads commands interactively
|
104
|
+
when run, you must cause it, when started running for such
|
105
|
+
interactive use in the most ordinary way, to print or display an
|
106
|
+
announcement including an appropriate copyright notice and a
|
107
|
+
notice that there is no warranty (or else, saying that you provide
|
108
|
+
a warranty) and that users may redistribute the program under
|
109
|
+
these conditions, and telling the user how to view a copy of this
|
110
|
+
License. (Exception: if the Program itself is interactive but
|
111
|
+
does not normally print such an announcement, your work based on
|
112
|
+
the Program is not required to print an announcement.)
|
113
|
+
|
114
|
+
These requirements apply to the modified work as a whole. If
|
115
|
+
identifiable sections of that work are not derived from the Program,
|
116
|
+
and can be reasonably considered independent and separate works in
|
117
|
+
themselves, then this License, and its terms, do not apply to those
|
118
|
+
sections when you distribute them as separate works. But when you
|
119
|
+
distribute the same sections as part of a whole which is a work based
|
120
|
+
on the Program, the distribution of the whole must be on the terms of
|
121
|
+
this License, whose permissions for other licensees extend to the
|
122
|
+
entire whole, and thus to each and every part regardless of who wrote it.
|
123
|
+
|
124
|
+
Thus, it is not the intent of this section to claim rights or contest
|
125
|
+
your rights to work written entirely by you; rather, the intent is to
|
126
|
+
exercise the right to control the distribution of derivative or
|
127
|
+
collective works based on the Program.
|
128
|
+
|
129
|
+
In addition, mere aggregation of another work not based on the Program
|
130
|
+
with the Program (or with a work based on the Program) on a volume of
|
131
|
+
a storage or distribution medium does not bring the other work under
|
132
|
+
the scope of this License.
|
133
|
+
|
134
|
+
3. You may copy and distribute the Program (or a work based on it,
|
135
|
+
under Section 2) in object code or executable form under the terms of
|
136
|
+
Sections 1 and 2 above provided that you also do one of the following:
|
137
|
+
|
138
|
+
a) Accompany it with the complete corresponding machine-readable
|
139
|
+
source code, which must be distributed under the terms of Sections
|
140
|
+
1 and 2 above on a medium customarily used for software interchange; or,
|
141
|
+
|
142
|
+
b) Accompany it with a written offer, valid for at least three
|
143
|
+
years, to give any third party, for a charge no more than your
|
144
|
+
cost of physically performing source distribution, a complete
|
145
|
+
machine-readable copy of the corresponding source code, to be
|
146
|
+
distributed under the terms of Sections 1 and 2 above on a medium
|
147
|
+
customarily used for software interchange; or,
|
148
|
+
|
149
|
+
c) Accompany it with the information you received as to the offer
|
150
|
+
to distribute corresponding source code. (This alternative is
|
151
|
+
allowed only for noncommercial distribution and only if you
|
152
|
+
received the program in object code or executable form with such
|
153
|
+
an offer, in accord with Subsection b above.)
|
154
|
+
|
155
|
+
The source code for a work means the preferred form of the work for
|
156
|
+
making modifications to it. For an executable work, complete source
|
157
|
+
code means all the source code for all modules it contains, plus any
|
158
|
+
associated interface definition files, plus the scripts used to
|
159
|
+
control compilation and installation of the executable. However, as a
|
160
|
+
special exception, the source code distributed need not include
|
161
|
+
anything that is normally distributed (in either source or binary
|
162
|
+
form) with the major components (compiler, kernel, and so on) of the
|
163
|
+
operating system on which the executable runs, unless that component
|
164
|
+
itself accompanies the executable.
|
165
|
+
|
166
|
+
If distribution of executable or object code is made by offering
|
167
|
+
access to copy from a designated place, then offering equivalent
|
168
|
+
access to copy the source code from the same place counts as
|
169
|
+
distribution of the source code, even though third parties are not
|
170
|
+
compelled to copy the source along with the object code.
|
171
|
+
|
172
|
+
4. You may not copy, modify, sublicense, or distribute the Program
|
173
|
+
except as expressly provided under this License. Any attempt
|
174
|
+
otherwise to copy, modify, sublicense or distribute the Program is
|
175
|
+
void, and will automatically terminate your rights under this License.
|
176
|
+
However, parties who have received copies, or rights, from you under
|
177
|
+
this License will not have their licenses terminated so long as such
|
178
|
+
parties remain in full compliance.
|
179
|
+
|
180
|
+
5. You are not required to accept this License, since you have not
|
181
|
+
signed it. However, nothing else grants you permission to modify or
|
182
|
+
distribute the Program or its derivative works. These actions are
|
183
|
+
prohibited by law if you do not accept this License. Therefore, by
|
184
|
+
modifying or distributing the Program (or any work based on the
|
185
|
+
Program), you indicate your acceptance of this License to do so, and
|
186
|
+
all its terms and conditions for copying, distributing or modifying
|
187
|
+
the Program or works based on it.
|
188
|
+
|
189
|
+
6. Each time you redistribute the Program (or any work based on the
|
190
|
+
Program), the recipient automatically receives a license from the
|
191
|
+
original licensor to copy, distribute or modify the Program subject to
|
192
|
+
these terms and conditions. You may not impose any further
|
193
|
+
restrictions on the recipients' exercise of the rights granted herein.
|
194
|
+
You are not responsible for enforcing compliance by third parties to
|
195
|
+
this License.
|
196
|
+
|
197
|
+
7. If, as a consequence of a court judgment or allegation of patent
|
198
|
+
infringement or for any other reason (not limited to patent issues),
|
199
|
+
conditions are imposed on you (whether by court order, agreement or
|
200
|
+
otherwise) that contradict the conditions of this License, they do not
|
201
|
+
excuse you from the conditions of this License. If you cannot
|
202
|
+
distribute so as to satisfy simultaneously your obligations under this
|
203
|
+
License and any other pertinent obligations, then as a consequence you
|
204
|
+
may not distribute the Program at all. For example, if a patent
|
205
|
+
license would not permit royalty-free redistribution of the Program by
|
206
|
+
all those who receive copies directly or indirectly through you, then
|
207
|
+
the only way you could satisfy both it and this License would be to
|
208
|
+
refrain entirely from distribution of the Program.
|
209
|
+
|
210
|
+
If any portion of this section is held invalid or unenforceable under
|
211
|
+
any particular circumstance, the balance of the section is intended to
|
212
|
+
apply and the section as a whole is intended to apply in other
|
213
|
+
circumstances.
|
214
|
+
|
215
|
+
It is not the purpose of this section to induce you to infringe any
|
216
|
+
patents or other property right claims or to contest validity of any
|
217
|
+
such claims; this section has the sole purpose of protecting the
|
218
|
+
integrity of the free software distribution system, which is
|
219
|
+
implemented by public license practices. Many people have made
|
220
|
+
generous contributions to the wide range of software distributed
|
221
|
+
through that system in reliance on consistent application of that
|
222
|
+
system; it is up to the author/donor to decide if he or she is willing
|
223
|
+
to distribute software through any other system and a licensee cannot
|
224
|
+
impose that choice.
|
225
|
+
|
226
|
+
This section is intended to make thoroughly clear what is believed to
|
227
|
+
be a consequence of the rest of this License.
|
228
|
+
|
229
|
+
8. If the distribution and/or use of the Program is restricted in
|
230
|
+
certain countries either by patents or by copyrighted interfaces, the
|
231
|
+
original copyright holder who places the Program under this License
|
232
|
+
may add an explicit geographical distribution limitation excluding
|
233
|
+
those countries, so that distribution is permitted only in or among
|
234
|
+
countries not thus excluded. In such case, this License incorporates
|
235
|
+
the limitation as if written in the body of this License.
|
236
|
+
|
237
|
+
9. The Free Software Foundation may publish revised and/or new versions
|
238
|
+
of the General Public License from time to time. Such new versions will
|
239
|
+
be similar in spirit to the present version, but may differ in detail to
|
240
|
+
address new problems or concerns.
|
241
|
+
|
242
|
+
Each version is given a distinguishing version number. If the Program
|
243
|
+
specifies a version number of this License which applies to it and "any
|
244
|
+
later version", you have the option of following the terms and conditions
|
245
|
+
either of that version or of any later version published by the Free
|
246
|
+
Software Foundation. If the Program does not specify a version number of
|
247
|
+
this License, you may choose any version ever published by the Free Software
|
248
|
+
Foundation.
|
249
|
+
|
250
|
+
10. If you wish to incorporate parts of the Program into other free
|
251
|
+
programs whose distribution conditions are different, write to the author
|
252
|
+
to ask for permission. For software which is copyrighted by the Free
|
253
|
+
Software Foundation, write to the Free Software Foundation; we sometimes
|
254
|
+
make exceptions for this. Our decision will be guided by the two goals
|
255
|
+
of preserving the free status of all derivatives of our free software and
|
256
|
+
of promoting the sharing and reuse of software generally.
|
257
|
+
|
258
|
+
NO WARRANTY
|
259
|
+
|
260
|
+
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
|
261
|
+
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
|
262
|
+
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
|
263
|
+
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
|
264
|
+
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
265
|
+
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
|
266
|
+
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
|
267
|
+
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
|
268
|
+
REPAIR OR CORRECTION.
|
269
|
+
|
270
|
+
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
271
|
+
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
|
272
|
+
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
|
273
|
+
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
|
274
|
+
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
|
275
|
+
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
|
276
|
+
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
|
277
|
+
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
|
278
|
+
POSSIBILITY OF SUCH DAMAGES.
|
279
|
+
|
280
|
+
END OF TERMS AND CONDITIONS
|
281
|
+
|
282
|
+
How to Apply These Terms to Your New Programs
|
283
|
+
|
284
|
+
If you develop a new program, and you want it to be of the greatest
|
285
|
+
possible use to the public, the best way to achieve this is to make it
|
286
|
+
free software which everyone can redistribute and change under these terms.
|
287
|
+
|
288
|
+
To do so, attach the following notices to the program. It is safest
|
289
|
+
to attach them to the start of each source file to most effectively
|
290
|
+
convey the exclusion of warranty; and each file should have at least
|
291
|
+
the "copyright" line and a pointer to where the full notice is found.
|
292
|
+
|
293
|
+
{description}
|
294
|
+
Copyright (C) {year} {fullname}
|
295
|
+
|
296
|
+
This program is free software; you can redistribute it and/or modify
|
297
|
+
it under the terms of the GNU General Public License as published by
|
298
|
+
the Free Software Foundation; either version 2 of the License, or
|
299
|
+
(at your option) any later version.
|
300
|
+
|
301
|
+
This program is distributed in the hope that it will be useful,
|
302
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
303
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
304
|
+
GNU General Public License for more details.
|
305
|
+
|
306
|
+
You should have received a copy of the GNU General Public License along
|
307
|
+
with this program; if not, write to the Free Software Foundation, Inc.,
|
308
|
+
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
309
|
+
|
310
|
+
Also add information on how to contact you by electronic and paper mail.
|
311
|
+
|
312
|
+
If the program is interactive, make it output a short notice like this
|
313
|
+
when it starts in an interactive mode:
|
314
|
+
|
315
|
+
Gnomovision version 69, Copyright (C) year name of author
|
316
|
+
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
317
|
+
This is free software, and you are welcome to redistribute it
|
318
|
+
under certain conditions; type `show c' for details.
|
319
|
+
|
320
|
+
The hypothetical commands `show w' and `show c' should show the appropriate
|
321
|
+
parts of the General Public License. Of course, the commands you use may
|
322
|
+
be called something other than `show w' and `show c'; they could even be
|
323
|
+
mouse-clicks or menu items--whatever suits your program.
|
324
|
+
|
325
|
+
You should also get your employer (if you work as a programmer) or your
|
326
|
+
school, if any, to sign a "copyright disclaimer" for the program, if
|
327
|
+
necessary. Here is a sample; alter the names:
|
328
|
+
|
329
|
+
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
|
330
|
+
`Gnomovision' (which makes passes at compilers) written by James Hacker.
|
331
|
+
|
332
|
+
{signature of Ty Coon}, 1 April 1989
|
333
|
+
Ty Coon, President of Vice
|
334
|
+
|
335
|
+
This General Public License does not permit incorporating your program into
|
336
|
+
proprietary programs. If your program is a subroutine library, you may
|
337
|
+
consider it more useful to permit linking proprietary applications with the
|
338
|
+
library. If this is what you want to do, use the GNU Lesser General
|
339
|
+
Public License instead of this License.
|
340
|
+
|
data/README.md
ADDED
@@ -0,0 +1,92 @@
|
|
1
|
+
# Breakout
|
2
|
+
|
3
|
+
:fire: [BreakoutDetection](https://github.com/twitter/BreakoutDetection) for Ruby
|
4
|
+
|
5
|
+
Learn more about [how it works](https://blog.twitter.com/engineering/en_us/a/2014/breakout-detection-in-the-wild)
|
6
|
+
|
7
|
+
[![Build Status](https://github.com/ankane/breakout/workflows/build/badge.svg?branch=master)](https://github.com/ankane/breakout/actions)
|
8
|
+
|
9
|
+
## Installation
|
10
|
+
|
11
|
+
Add this line to your application’s Gemfile:
|
12
|
+
|
13
|
+
```ruby
|
14
|
+
gem 'breakout-detection'
|
15
|
+
```
|
16
|
+
|
17
|
+
## Getting Started
|
18
|
+
|
19
|
+
Detect breakouts in a time series
|
20
|
+
|
21
|
+
```ruby
|
22
|
+
series = {
|
23
|
+
Date.parse("2020-01-01") => 100,
|
24
|
+
Date.parse("2020-01-02") => 150,
|
25
|
+
Date.parse("2020-01-03") => 136,
|
26
|
+
# ...
|
27
|
+
}
|
28
|
+
|
29
|
+
Breakout.detect(series)
|
30
|
+
```
|
31
|
+
|
32
|
+
Works great with [Groupdate](https://github.com/ankane/groupdate)
|
33
|
+
|
34
|
+
```ruby
|
35
|
+
series = User.group_by_day(:created_at).count
|
36
|
+
Breakout.detect(series)
|
37
|
+
```
|
38
|
+
|
39
|
+
Series can also be an array without times (the index is returned)
|
40
|
+
|
41
|
+
```ruby
|
42
|
+
series = [100, 150, 136, ...]
|
43
|
+
Breakout.detect(series)
|
44
|
+
```
|
45
|
+
|
46
|
+
## Options
|
47
|
+
|
48
|
+
Pass options - default values below
|
49
|
+
|
50
|
+
```ruby
|
51
|
+
Breakout.detect(
|
52
|
+
series,
|
53
|
+
min_size: 30, # minimum observations between breakouts
|
54
|
+
method: "multi", # multi or amoc (at most one change)
|
55
|
+
degree: 1, # degree of the penalization polynomial (multi only)
|
56
|
+
beta: 0.008, # penalization term (multi only)
|
57
|
+
percent: nil, # minimum percent change in goodness of fit statistic (multi only)
|
58
|
+
alpha: 2, # weight of the distance between observations (amoc only)
|
59
|
+
exact: true # exact or approximate median (amoc only)
|
60
|
+
)
|
61
|
+
```
|
62
|
+
|
63
|
+
## Credits
|
64
|
+
|
65
|
+
This library uses the C++ code from the [BreakoutDetection](https://github.com/twitter/BreakoutDetection) R package and is available under the same license.
|
66
|
+
|
67
|
+
## References
|
68
|
+
|
69
|
+
- [Leveraging Cloud Data to Mitigate User Experience from ‘Breaking Bad’](https://arxiv.org/abs/1411.7955)
|
70
|
+
|
71
|
+
## History
|
72
|
+
|
73
|
+
View the [changelog](https://github.com/ankane/breakout/blob/master/CHANGELOG.md)
|
74
|
+
|
75
|
+
## Contributing
|
76
|
+
|
77
|
+
Everyone is encouraged to help improve this project. Here are a few ways you can help:
|
78
|
+
|
79
|
+
- [Report bugs](https://github.com/ankane/breakout/issues)
|
80
|
+
- Fix bugs and [submit pull requests](https://github.com/ankane/breakout/pulls)
|
81
|
+
- Write, clarify, or fix documentation
|
82
|
+
- Suggest or add new features
|
83
|
+
|
84
|
+
To get started with development:
|
85
|
+
|
86
|
+
```sh
|
87
|
+
git clone https://github.com/ankane/breakout.git
|
88
|
+
cd breakout
|
89
|
+
bundle install
|
90
|
+
bundle exec rake compile
|
91
|
+
bundle exec rake test
|
92
|
+
```
|
@@ -0,0 +1,93 @@
|
|
1
|
+
#include <algorithm>
|
2
|
+
#include <cmath>
|
3
|
+
#include <set>
|
4
|
+
#include <vector>
|
5
|
+
#include "helper.h"
|
6
|
+
|
7
|
+
// Z: time series
|
8
|
+
// min_size: minimum segment size
|
9
|
+
// beta: penalization term for the addition of a change point
|
10
|
+
|
11
|
+
std::vector<int> EDM_multi(const std::vector<double>& Z, int min_size = 24, double beta = 0, int degree = 0) {
|
12
|
+
|
13
|
+
// identify which type of penalization to use
|
14
|
+
double (*G)(double);
|
15
|
+
switch (degree) {
|
16
|
+
case 1:
|
17
|
+
G = Linear;
|
18
|
+
break;
|
19
|
+
case 2:
|
20
|
+
G = Quadratic;
|
21
|
+
break;
|
22
|
+
default:
|
23
|
+
G = Const;
|
24
|
+
break;
|
25
|
+
}
|
26
|
+
|
27
|
+
int n = Z.size();
|
28
|
+
if (beta < 0) // assume that beta is a positive number
|
29
|
+
beta = -beta;
|
30
|
+
std::vector<int> prev(n + 1, 0); // store optimal location of previous change point
|
31
|
+
std::vector<int> number(n + 1, 0); // store the number of change points in optimal segmentation
|
32
|
+
std::vector<double> F(n + 1, -3); // store optimal statistic value
|
33
|
+
// F[s] is calculated using observations { Z[0], Z[1], ..., Z[s-1] }
|
34
|
+
|
35
|
+
// trees used to store the "upper half" of the considered observations
|
36
|
+
std::multiset<double> right_min, left_min;
|
37
|
+
// trees used to store the "lower half" of the considered observations
|
38
|
+
std::multiset<double, std::greater<double>> right_max, left_max;
|
39
|
+
|
40
|
+
// Iterate over possible locations for the last change
|
41
|
+
for (int s = 2 * min_size; s < n + 1; ++s) {
|
42
|
+
right_max.clear();
|
43
|
+
right_min.clear(); // clear right trees
|
44
|
+
left_max.clear();
|
45
|
+
left_min.clear(); // clear left trees
|
46
|
+
|
47
|
+
// initialize left and right trees to account for minimum segment size
|
48
|
+
for (int i = prev[min_size - 1]; i < min_size - 1; ++i)
|
49
|
+
insert_element(left_min, left_max, Z[i]);
|
50
|
+
for (int i = min_size - 1; i < s; ++i)
|
51
|
+
insert_element(right_min, right_max, Z[i]);
|
52
|
+
|
53
|
+
// Iterate over possible locations for the penultiamte change
|
54
|
+
for (int t = min_size; t < s - min_size + 1; ++t) { // modify limits to deal with min_size
|
55
|
+
insert_element(left_min, left_max, Z[t - 1]); // insert element into left tree
|
56
|
+
remove_element(right_min, right_max, Z[t - 1]); // remove element from right tree
|
57
|
+
// left tree now has { Z[prev[t-1]], ..., Z[t-1] }
|
58
|
+
// right tree now has { Z[t], ..., Z[s-1] }
|
59
|
+
|
60
|
+
// check to see if optimal position of previous change point has changed
|
61
|
+
// if so update the left tree
|
62
|
+
if (prev[t] > prev[t - 1]) {
|
63
|
+
for (int i = prev[t - 1]; i < prev[t]; ++i)
|
64
|
+
remove_element(left_min, left_max, Z[i]);
|
65
|
+
} else if (prev[t] < prev[t - 1]) {
|
66
|
+
for (int i = prev[t]; i < prev[t - 1]; ++i)
|
67
|
+
insert_element(left_min, left_max, Z[i]);
|
68
|
+
}
|
69
|
+
|
70
|
+
// calculate statistic value
|
71
|
+
double left_median = get_median(left_min, left_max), right_median = get_median(right_min, right_max);
|
72
|
+
double normalize = ((t - prev[t]) * (s - t)) / (std::pow((double)(s - prev[t]), 2.0));
|
73
|
+
double tmp = F[t] + normalize * std::pow(left_median - right_median, 2.0) - beta * G(number[t]);
|
74
|
+
// check for improved optimal statistic value
|
75
|
+
if (tmp > F[s]) {
|
76
|
+
number[s] = number[t] + 1;
|
77
|
+
F[s] = tmp;
|
78
|
+
prev[s] = t;
|
79
|
+
}
|
80
|
+
}
|
81
|
+
}
|
82
|
+
|
83
|
+
// obtain list of optimal change point estimates
|
84
|
+
std::vector<int> ret;
|
85
|
+
int at = n;
|
86
|
+
while (at) {
|
87
|
+
if (prev[at]) // don't insert 0 as a change point estimate
|
88
|
+
ret.push_back(prev[at]);
|
89
|
+
at = prev[at];
|
90
|
+
}
|
91
|
+
sort(ret.begin(), ret.end());
|
92
|
+
return ret;
|
93
|
+
}
|
@@ -0,0 +1,105 @@
|
|
1
|
+
/*
|
2
|
+
Penalizes based on percent chagne in the statistic value.
|
3
|
+
Linear penalty means that each new breakout must result in an at least X% increast
|
4
|
+
Quadratic penalty means that each new brekaout must result in at least an (X*k)% increase for k breakouts
|
5
|
+
*/
|
6
|
+
|
7
|
+
#include <algorithm>
|
8
|
+
#include <cmath>
|
9
|
+
#include <set>
|
10
|
+
#include <vector>
|
11
|
+
#include "helper.h"
|
12
|
+
|
13
|
+
std::vector<int> EDM_percent(const std::vector<double>& Z, int min_size = 24, double percent = 0, int degree = 0) {
|
14
|
+
// Z: time series
|
15
|
+
// min_size: minimum segment size
|
16
|
+
// beta: penalization term for the addition of a change point
|
17
|
+
|
18
|
+
// identify which type of penalization to use
|
19
|
+
double (*G)(double);
|
20
|
+
switch (degree) {
|
21
|
+
case 1:
|
22
|
+
G = Linear;
|
23
|
+
break;
|
24
|
+
case 2:
|
25
|
+
G = Quadratic;
|
26
|
+
break;
|
27
|
+
default:
|
28
|
+
G = Const;
|
29
|
+
break;
|
30
|
+
}
|
31
|
+
|
32
|
+
int n = Z.size();
|
33
|
+
|
34
|
+
std::vector<int> prev(n + 1, 0); // store optimal location of previous change point
|
35
|
+
std::vector<int> number(n + 1, 0); // store the number of change points in optimal segmentation
|
36
|
+
std::vector<double> F(n + 1, 0); // store optimal statistic value
|
37
|
+
// F[s] is calculated using observations { Z[0], Z[1], ..., Z[s-1] }
|
38
|
+
|
39
|
+
// trees used to store the "upper half" of the considered observations
|
40
|
+
std::multiset<double> right_min, left_min;
|
41
|
+
// trees used to store the "lower half" of the considered observations
|
42
|
+
std::multiset<double, std::greater<double>> right_max, left_max;
|
43
|
+
|
44
|
+
// Iterate over possible locations for the last change
|
45
|
+
for (int s = 2 * min_size; s < n + 1; ++s) {
|
46
|
+
right_max.clear();
|
47
|
+
right_min.clear(); // clear right trees
|
48
|
+
left_max.clear();
|
49
|
+
left_min.clear(); // clear left trees
|
50
|
+
|
51
|
+
// initialize left and right trees to account for minimum segment size
|
52
|
+
for (int i = prev[min_size - 1]; i < min_size - 1; ++i)
|
53
|
+
insert_element(left_min, left_max, Z[i]);
|
54
|
+
for (int i = min_size - 1; i < s; ++i)
|
55
|
+
insert_element(right_min, right_max, Z[i]);
|
56
|
+
|
57
|
+
// Iterate over possible locations for the penultimate change
|
58
|
+
for (int t = min_size; t < s - min_size + 1; ++t) { // modify limits to deal with min_size
|
59
|
+
insert_element(left_min, left_max, Z[t - 1]); // insert element into left tree
|
60
|
+
remove_element(right_min, right_max, Z[t - 1]); // remove element from right tree
|
61
|
+
// left tree now has { Z[prev[t-1]], ..., Z[t-1] }
|
62
|
+
// right tree now has { Z[t], ..., Z[s-1] }
|
63
|
+
|
64
|
+
// check to see if optimal position of previous change point has changed
|
65
|
+
// if so update the left tree
|
66
|
+
if (prev[t] > prev[t - 1]) {
|
67
|
+
for (int i = prev[t - 1]; i < prev[t]; ++i)
|
68
|
+
remove_element(left_min, left_max, Z[i]);
|
69
|
+
} else if (prev[t] < prev[t - 1]) {
|
70
|
+
for (int i = prev[t]; i < prev[t - 1]; ++i)
|
71
|
+
insert_element(left_min, left_max, Z[i]);
|
72
|
+
}
|
73
|
+
|
74
|
+
// calculate statistic value
|
75
|
+
double left_median = get_median(left_min, left_max), right_median = get_median(right_min, right_max);
|
76
|
+
double normalize = ((t - prev[t]) * (s - t)) / (std::pow(static_cast<double>(s - prev[t]), 2));
|
77
|
+
double tmp = F[t] + normalize * std::pow(static_cast<double>(left_median - right_median), 2);
|
78
|
+
// Find best location for change point. check % condition later
|
79
|
+
if (tmp > F[s]) {
|
80
|
+
number[s] = number[t] + 1;
|
81
|
+
F[s] = tmp;
|
82
|
+
prev[s] = t;
|
83
|
+
}
|
84
|
+
}
|
85
|
+
// check to make sure we meet the percent change requirement
|
86
|
+
if (prev[s]) {
|
87
|
+
if (F[s] - F[prev[s]] < percent * G(number[prev[s]]) * F[prev[s]]) {
|
88
|
+
number[s] = number[prev[s]];
|
89
|
+
F[s] = F[prev[s]];
|
90
|
+
prev[s] = prev[prev[s]];
|
91
|
+
}
|
92
|
+
}
|
93
|
+
}
|
94
|
+
|
95
|
+
// obtain list of optimal change point estimates
|
96
|
+
std::vector<int> ret;
|
97
|
+
int at = n;
|
98
|
+
while (at) {
|
99
|
+
if (prev[at]) // don't insert 0 as a change point estimate
|
100
|
+
ret.push_back(prev[at]);
|
101
|
+
at = prev[at];
|
102
|
+
}
|
103
|
+
sort(ret.begin(), ret.end());
|
104
|
+
return ret;
|
105
|
+
}
|
@@ -0,0 +1,380 @@
|
|
1
|
+
/*
|
2
|
+
This version calculates the between distance using the delta points around the change point estimate.
|
3
|
+
*/
|
4
|
+
|
5
|
+
#include <algorithm>
|
6
|
+
#include <cmath>
|
7
|
+
#include <iostream>
|
8
|
+
#include <vector>
|
9
|
+
|
10
|
+
// Class used to hold all the information about the
|
11
|
+
// breakout location and the interval trees
|
12
|
+
struct Information {
|
13
|
+
std::vector<double> A, B, AB;
|
14
|
+
double best_stat;
|
15
|
+
int best_loc, best_t2;
|
16
|
+
int min_size, b;
|
17
|
+
|
18
|
+
Information(int, int);
|
19
|
+
};
|
20
|
+
|
21
|
+
Information::Information(int bb, int m) {
|
22
|
+
A = std::vector<double>(1 << (bb + 1));
|
23
|
+
B = std::vector<double>(1 << (bb + 1));
|
24
|
+
AB = std::vector<double>(1 << (bb + 1));
|
25
|
+
b = bb;
|
26
|
+
best_stat = best_loc = best_t2 = -3;
|
27
|
+
min_size = m;
|
28
|
+
}
|
29
|
+
|
30
|
+
void BackwardUpdate(std::vector<double>& Z, Information& info, int& tau1, double quant, double alpha);
|
31
|
+
void ForwardUpdate(std::vector<double>& Z, Information& info, int& tau1, double quant, double alpha);
|
32
|
+
|
33
|
+
int GetIndex(int B, double x) {
|
34
|
+
// Get index of leaf node interval containing x
|
35
|
+
return (int)std::ceil(std::abs(x) * (1 << B)) + (1 << B) - 1;
|
36
|
+
}
|
37
|
+
|
38
|
+
double GetQuantile(std::vector<double>& x, double quant) {
|
39
|
+
// Return approximate quantile based on the interval tree
|
40
|
+
|
41
|
+
int N = x.size();
|
42
|
+
int k = std::ceil(x[1] * quant);
|
43
|
+
double l = 0, u = 1;
|
44
|
+
int i = 1, j;
|
45
|
+
while (i < N) { // Make sure that we do not go beyond the array bounds
|
46
|
+
j = i << 1;
|
47
|
+
if (j >= N)
|
48
|
+
break;
|
49
|
+
if (x[i] == k) { // Exactly k elements in this node's subtree. So can terminate early
|
50
|
+
// Return a weighted combination of the child node medians
|
51
|
+
double lWeight = x[j] / (x[j] + x[j + 1]);
|
52
|
+
double rWeight = 1 - lWeight;
|
53
|
+
double lu, rl;
|
54
|
+
lu = (u + l) / 2;
|
55
|
+
rl = (u + lu) / 2;
|
56
|
+
return lWeight * (quant * (lu - l) + l) + rWeight * (quant * (u - rl) + rl);
|
57
|
+
} else if (x[j] >= k) { // More than k elements in node's left child's subtree, move to left child
|
58
|
+
i = j;
|
59
|
+
u = (l + u) / 2;
|
60
|
+
} else if (x[j] < k) { // Not enough elements in node's left child's subtree, move to right child
|
61
|
+
k -= x[j];
|
62
|
+
i = j + 1;
|
63
|
+
l = (l + u) / 2;
|
64
|
+
}
|
65
|
+
}
|
66
|
+
return quant * (u - l) + l;
|
67
|
+
}
|
68
|
+
|
69
|
+
std::vector<int> AddToTree(int B, std::vector<double>& x) {
|
70
|
+
std::vector<int> A(1 << (B + 1));
|
71
|
+
std::vector<double>::iterator i;
|
72
|
+
for (i = x.begin(); i < x.end(); ++i) { // Iterage over items we wish to add to the tree
|
73
|
+
int index = GetIndex(B, *i);
|
74
|
+
while (index) {
|
75
|
+
++A[index];
|
76
|
+
index /= 2;
|
77
|
+
}
|
78
|
+
}
|
79
|
+
return A;
|
80
|
+
}
|
81
|
+
|
82
|
+
std::vector<int> EDM_tail(std::vector<double>& Z, int min_size = 24, double alpha = 2, double quant = 0.5) {
|
83
|
+
|
84
|
+
int N = Z.size();
|
85
|
+
int eps = (int)std::ceil(std::log(N));
|
86
|
+
eps = std::max(eps, 10);
|
87
|
+
|
88
|
+
Information info(eps, min_size);
|
89
|
+
|
90
|
+
int tau1 = info.min_size;
|
91
|
+
int tau2 = tau1 * 2;
|
92
|
+
|
93
|
+
// Populate trees and calculate statistic value for starting configuration of
|
94
|
+
// 2 min_size segments
|
95
|
+
for (int i = 0; i < tau1; ++i) {
|
96
|
+
for (int j = i + 1; j < tau1; ++j) {
|
97
|
+
int index = GetIndex(info.b, Z[i] - Z[j]);
|
98
|
+
while (index) {
|
99
|
+
++info.A[index];
|
100
|
+
index /= 2;
|
101
|
+
}
|
102
|
+
}
|
103
|
+
}
|
104
|
+
|
105
|
+
// Populate trees and calculate statistic value for starting configuration of
|
106
|
+
// 2 min_size segments
|
107
|
+
for (int i = tau1; i < tau2; ++i) {
|
108
|
+
for (int j = i + 1; j < tau2; ++j) {
|
109
|
+
int index = GetIndex(info.b, Z[i] - Z[j]);
|
110
|
+
while (index) {
|
111
|
+
++info.B[index];
|
112
|
+
index /= 2;
|
113
|
+
}
|
114
|
+
}
|
115
|
+
}
|
116
|
+
|
117
|
+
// Populate trees and calculate statistic value for starting configuration of
|
118
|
+
// 2 min_size segments
|
119
|
+
for (int i = 0; i < tau1; ++i) {
|
120
|
+
for (int j = tau1; j < tau2; ++j) {
|
121
|
+
int index = GetIndex(info.b, Z[i] - Z[j]);
|
122
|
+
while (index) {
|
123
|
+
++info.AB[index];
|
124
|
+
index /= 2;
|
125
|
+
}
|
126
|
+
}
|
127
|
+
}
|
128
|
+
|
129
|
+
double qa, qb, qc, stat;
|
130
|
+
|
131
|
+
qa = std::pow(GetQuantile(info.A, quant), alpha);
|
132
|
+
qb = std::pow(GetQuantile(info.B, quant), alpha);
|
133
|
+
qc = std::pow(GetQuantile(info.AB, quant), alpha);
|
134
|
+
|
135
|
+
stat = 2 * qc - qa - qb;
|
136
|
+
stat *= (double)(tau1) * (tau2 - tau1) / (tau2);
|
137
|
+
|
138
|
+
info.best_stat = stat;
|
139
|
+
info.best_loc = tau1;
|
140
|
+
info.best_t2 = tau2;
|
141
|
+
|
142
|
+
// Increment tau2 and update trees and statistic
|
143
|
+
++tau2;
|
144
|
+
for (; tau2 < N + 1; ++tau2) {
|
145
|
+
int index = GetIndex(info.b, Z[tau2 - 1] - Z[tau2 - 2]);
|
146
|
+
while (index) { // array position 0 is not used, so we exit once we reach this location
|
147
|
+
++info.B[index];
|
148
|
+
index /= 2;
|
149
|
+
}
|
150
|
+
qb = std::pow(GetQuantile(info.B, quant), alpha);
|
151
|
+
stat = 2 * qc - qa - qb;
|
152
|
+
stat *= (double)(tau2 - tau1) * tau1 / tau2;
|
153
|
+
|
154
|
+
if (stat > info.best_stat) {
|
155
|
+
info.best_stat = stat;
|
156
|
+
info.best_loc = tau1;
|
157
|
+
info.best_t2 = tau2;
|
158
|
+
}
|
159
|
+
}
|
160
|
+
|
161
|
+
bool forward_move = false;
|
162
|
+
// Initial consideration of other possible locations for tau1
|
163
|
+
while (tau1 < N - min_size) {
|
164
|
+
//"warm start" to update tree and statistic value for other prefix series
|
165
|
+
if (forward_move) {
|
166
|
+
ForwardUpdate(Z, info, tau1, quant, alpha);
|
167
|
+
} else {
|
168
|
+
BackwardUpdate(Z, info, tau1, quant, alpha);
|
169
|
+
}
|
170
|
+
forward_move = !forward_move;
|
171
|
+
}
|
172
|
+
|
173
|
+
std::vector<int> ret;
|
174
|
+
if (info.best_stat > 0) {
|
175
|
+
ret.push_back(info.best_loc);
|
176
|
+
}
|
177
|
+
return ret;
|
178
|
+
}
|
179
|
+
|
180
|
+
void ForwardUpdate(std::vector<double>& Z, Information& info, int& tau1, double quant, double alpha) {
|
181
|
+
|
182
|
+
int min_size = info.min_size;
|
183
|
+
int tau2 = tau1 + min_size;
|
184
|
+
++tau1;
|
185
|
+
int N = Z.size(), index;
|
186
|
+
// Update A tree
|
187
|
+
for (int i = tau1 - min_size; i < tau1 - 1; ++i) {
|
188
|
+
index = GetIndex(info.b, Z[i] - Z[tau1 - 1]);
|
189
|
+
while (index) {
|
190
|
+
++info.A[index];
|
191
|
+
index /= 2;
|
192
|
+
}
|
193
|
+
}
|
194
|
+
for (int i = tau1 - min_size; i < tau1; ++i) {
|
195
|
+
index = GetIndex(info.b, Z[i] - Z[tau1 - min_size - 1]);
|
196
|
+
while (index) {
|
197
|
+
--info.A[index];
|
198
|
+
index /= 2;
|
199
|
+
}
|
200
|
+
}
|
201
|
+
index = GetIndex(info.b, Z[tau1 - min_size - 1] - Z[tau1 - min_size]);
|
202
|
+
while (index) {
|
203
|
+
++info.A[index];
|
204
|
+
index /= 2;
|
205
|
+
}
|
206
|
+
double qa = std::pow(GetQuantile(info.A, quant), alpha);
|
207
|
+
|
208
|
+
// Update AB tree
|
209
|
+
index = GetIndex(info.b, Z[tau1 - 1] - Z[tau1 - min_size - 1]);
|
210
|
+
while (index) {
|
211
|
+
--info.AB[index];
|
212
|
+
index /= 2;
|
213
|
+
}
|
214
|
+
for (int i = tau1; i < tau2; ++i) {
|
215
|
+
index = GetIndex(info.b, Z[i] - Z[tau1 - min_size - 1]);
|
216
|
+
while (index) {
|
217
|
+
--info.AB[index];
|
218
|
+
index /= 2;
|
219
|
+
}
|
220
|
+
index = GetIndex(info.b, Z[i] - Z[tau1 - 1]);
|
221
|
+
while (index) {
|
222
|
+
++info.AB[index];
|
223
|
+
index /= 2;
|
224
|
+
}
|
225
|
+
}
|
226
|
+
for (int i = tau1 - min_size; i < tau1 - 1; ++i) {
|
227
|
+
index = GetIndex(info.b, Z[i] - Z[tau1 - 1]);
|
228
|
+
while (index) {
|
229
|
+
--info.AB[index];
|
230
|
+
index /= 2;
|
231
|
+
}
|
232
|
+
index = GetIndex(info.b, Z[i] - Z[tau2]);
|
233
|
+
while (index) {
|
234
|
+
++info.AB[index];
|
235
|
+
index /= 2;
|
236
|
+
}
|
237
|
+
}
|
238
|
+
index = GetIndex(info.b, Z[tau1 - 1] - Z[tau2]);
|
239
|
+
while (index) {
|
240
|
+
++info.AB[index];
|
241
|
+
index /= 2;
|
242
|
+
}
|
243
|
+
double qc = std::pow(GetQuantile(info.AB, quant), alpha);
|
244
|
+
|
245
|
+
// Update B tree
|
246
|
+
for (int i = tau1; i < tau2; ++i) {
|
247
|
+
index = GetIndex(info.b, Z[i] - Z[tau1 - 1]);
|
248
|
+
while (index) {
|
249
|
+
--info.B[index];
|
250
|
+
index /= 2;
|
251
|
+
}
|
252
|
+
index = GetIndex(info.b, Z[i] - Z[tau2]);
|
253
|
+
while (index) {
|
254
|
+
++info.B[index];
|
255
|
+
index /= 2;
|
256
|
+
}
|
257
|
+
}
|
258
|
+
|
259
|
+
// Increment tau2 and update statistic value as we proceed
|
260
|
+
++tau2;
|
261
|
+
for (; tau2 < N + 1; ++tau2) {
|
262
|
+
index = GetIndex(info.b, Z[tau2 - 1] - Z[tau2 - 2]);
|
263
|
+
while (index) {
|
264
|
+
++info.B[index];
|
265
|
+
index /= 2;
|
266
|
+
}
|
267
|
+
double qb = std::pow(GetQuantile(info.B, quant), alpha);
|
268
|
+
|
269
|
+
double stat = 2 * qc - qa - qb;
|
270
|
+
stat *= (double)(tau2 - tau1) * tau1 / tau2;
|
271
|
+
|
272
|
+
if (stat > info.best_stat) {
|
273
|
+
info.best_stat = stat;
|
274
|
+
info.best_loc = tau1;
|
275
|
+
info.best_t2 = tau2;
|
276
|
+
}
|
277
|
+
}
|
278
|
+
}
|
279
|
+
|
280
|
+
void BackwardUpdate(std::vector<double>& Z, Information& info, int& tau1, double quant, double alpha) {
|
281
|
+
|
282
|
+
int min_size = info.min_size;
|
283
|
+
int tau2 = tau1 + min_size;
|
284
|
+
++tau1;
|
285
|
+
int N = Z.size(), index;
|
286
|
+
// Update A tree
|
287
|
+
for (int i = tau1 - min_size; i < tau1 - 1; ++i) {
|
288
|
+
index = GetIndex(info.b, Z[i] - Z[tau1 - 1]);
|
289
|
+
while (index) {
|
290
|
+
++info.A[index];
|
291
|
+
index /= 2;
|
292
|
+
}
|
293
|
+
}
|
294
|
+
for (int i = tau1 - min_size; i < tau1; ++i) {
|
295
|
+
index = GetIndex(info.b, Z[i] - Z[tau1 - min_size - 1]);
|
296
|
+
while (index) {
|
297
|
+
--info.A[index];
|
298
|
+
index /= 2;
|
299
|
+
}
|
300
|
+
}
|
301
|
+
index = GetIndex(info.b, Z[tau1 - min_size - 1] - Z[tau1 - min_size]);
|
302
|
+
while (index) {
|
303
|
+
++info.A[index];
|
304
|
+
index /= 2;
|
305
|
+
}
|
306
|
+
double qa = std::pow(GetQuantile(info.A, quant), alpha);
|
307
|
+
|
308
|
+
// Update AB tree
|
309
|
+
index = GetIndex(info.b, Z[tau1 - 1] - Z[tau1 - min_size - 1]);
|
310
|
+
while (index) {
|
311
|
+
--info.AB[index];
|
312
|
+
index /= 2;
|
313
|
+
}
|
314
|
+
for (int i = tau1; i < tau2; ++i) {
|
315
|
+
index = GetIndex(info.b, Z[i] - Z[tau1 - min_size - 1]);
|
316
|
+
while (index) {
|
317
|
+
--info.AB[index];
|
318
|
+
index /= 2;
|
319
|
+
}
|
320
|
+
index = GetIndex(info.b, Z[i] - Z[tau1 - 1]);
|
321
|
+
while (index) {
|
322
|
+
++info.AB[index];
|
323
|
+
index /= 2;
|
324
|
+
}
|
325
|
+
}
|
326
|
+
for (int i = tau1 - min_size; i < tau1 - 1; ++i) {
|
327
|
+
index = GetIndex(info.b, Z[i] - Z[tau1 - 1]);
|
328
|
+
while (index) {
|
329
|
+
--info.AB[index];
|
330
|
+
index /= 2;
|
331
|
+
}
|
332
|
+
index = GetIndex(info.b, Z[i] - Z[tau2]);
|
333
|
+
while (index) {
|
334
|
+
++info.AB[index];
|
335
|
+
index /= 2;
|
336
|
+
}
|
337
|
+
}
|
338
|
+
index = GetIndex(info.b, Z[tau1 - 1] - Z[tau2]);
|
339
|
+
while (index) {
|
340
|
+
++info.AB[index];
|
341
|
+
index /= 2;
|
342
|
+
}
|
343
|
+
double qc = std::pow(GetQuantile(info.AB, quant), alpha);
|
344
|
+
|
345
|
+
// Update B tree
|
346
|
+
for (int i = tau1; i < tau1 + min_size - 1; ++i) {
|
347
|
+
index = GetIndex(info.b, Z[tau1 + min_size - 1] - Z[i]);
|
348
|
+
while (index) {
|
349
|
+
++info.B[index];
|
350
|
+
index /= 2;
|
351
|
+
}
|
352
|
+
index = GetIndex(info.b, Z[i] - Z[tau1 - 1]);
|
353
|
+
while (index) {
|
354
|
+
--info.B[index];
|
355
|
+
index /= 2;
|
356
|
+
}
|
357
|
+
}
|
358
|
+
double qb = std::pow(GetQuantile(info.B, quant), alpha);
|
359
|
+
// Move tau2 from the end of the time series to the front.
|
360
|
+
// Update the statistic value along the way
|
361
|
+
tau2 = N;
|
362
|
+
|
363
|
+
for (; tau2 >= tau1 + min_size; --tau2) {
|
364
|
+
index = GetIndex(info.b, Z[tau2 - 1] - Z[tau2 - 2]);
|
365
|
+
while (index) {
|
366
|
+
--info.B[index];
|
367
|
+
index /= 2;
|
368
|
+
}
|
369
|
+
qb = std::pow(GetQuantile(info.B, quant), alpha);
|
370
|
+
|
371
|
+
double stat = 2 * qc - qa - qb;
|
372
|
+
stat *= (double)(tau2 - tau1) * tau1 / tau2;
|
373
|
+
|
374
|
+
if (stat > info.best_stat) {
|
375
|
+
info.best_stat = stat;
|
376
|
+
info.best_loc = tau1;
|
377
|
+
info.best_t2 = tau2;
|
378
|
+
}
|
379
|
+
}
|
380
|
+
}
|
@@ -0,0 +1,90 @@
|
|
1
|
+
/*
|
2
|
+
Robust estimation of 2[mean(X)-mean(Y)]^2 time normalization factor
|
3
|
+
This is the E-Divisive E-statistic when alpha = 2
|
4
|
+
Instead of calculating mean(X) we calculate median(X), and similarly for Y
|
5
|
+
*/
|
6
|
+
|
7
|
+
#include <algorithm>
|
8
|
+
#include <cmath>
|
9
|
+
#include <queue>
|
10
|
+
#include <vector>
|
11
|
+
|
12
|
+
void AddToHeaps(std::priority_queue<double, std::vector<double>, std::greater<double>>& m, std::priority_queue<double>& M, double x);
|
13
|
+
|
14
|
+
double getMedian(const std::priority_queue<double, std::vector<double>, std::greater<double>>& m, const std::priority_queue<double>& M);
|
15
|
+
|
16
|
+
std::vector<int> EDMX(const std::vector<double>& Z, int min_size = 24, double alpha = 2) {
|
17
|
+
|
18
|
+
alpha = 2; // Not used, just here for uniform funciton signature
|
19
|
+
|
20
|
+
std::priority_queue<double> LeftMax;
|
21
|
+
std::priority_queue<double, std::vector<double>, std::greater<double>> LeftMin;
|
22
|
+
|
23
|
+
double stat = -3, stat_best = -3, t1 = 0.0, t2;
|
24
|
+
int tau1, tau2;
|
25
|
+
int N = Z.size();
|
26
|
+
for (int i = 0; i < min_size - 1; ++i)
|
27
|
+
AddToHeaps(LeftMin, LeftMax, Z[i]);
|
28
|
+
|
29
|
+
for (tau1 = min_size; tau1 < N - min_size + 1; ++tau1) { // Iterate over breakout locations
|
30
|
+
AddToHeaps(LeftMin, LeftMax, Z[tau1 - 1]);
|
31
|
+
std::priority_queue<double> RightMax;
|
32
|
+
std::priority_queue<double, std::vector<double>, std::greater<double>> RightMin;
|
33
|
+
double medL = getMedian(LeftMin, LeftMax);
|
34
|
+
|
35
|
+
// Add first set of elements to the heaps for the right segment
|
36
|
+
for (std::vector<double>::const_iterator i = Z.begin() + tau1; i != Z.begin() + tau1 + min_size - 1; ++i)
|
37
|
+
AddToHeaps(RightMin, RightMax, *i);
|
38
|
+
|
39
|
+
for (tau2 = tau1 + min_size; tau2 < N + 1; ++tau2) { // Iterate over end of prefix series locations
|
40
|
+
AddToHeaps(RightMin, RightMax, Z[tau2 - 1]);
|
41
|
+
double medR = getMedian(RightMin, RightMax);
|
42
|
+
|
43
|
+
stat = std::pow(medL - medR, 2);
|
44
|
+
stat *= ((double)tau1 * (tau2 - tau1) / tau2);
|
45
|
+
|
46
|
+
if (stat > stat_best) {
|
47
|
+
t1 = tau1;
|
48
|
+
t2 = tau2;
|
49
|
+
stat_best = stat;
|
50
|
+
}
|
51
|
+
}
|
52
|
+
}
|
53
|
+
|
54
|
+
std::vector<int> ret;
|
55
|
+
if (stat_best > 0) {
|
56
|
+
ret.push_back(t1);
|
57
|
+
}
|
58
|
+
return ret;
|
59
|
+
}
|
60
|
+
|
61
|
+
// Use 2 heaps to keep track of the median (can also be adjusted for other quantiles). One heap
|
62
|
+
// for the "larger" and one heap for the "smaller" observations. Simple to update for streaming
|
63
|
+
// data ( O(log n) ) and find median ( O(1) ).
|
64
|
+
|
65
|
+
double getMedian(const std::priority_queue<double, std::vector<double>, std::greater<double>>& m, const std::priority_queue<double>& M) {
|
66
|
+
|
67
|
+
if (m.size() > M.size()) // There are an odd number of observations
|
68
|
+
return m.top();
|
69
|
+
else if (M.size() > m.size()) // There are an odd number of observations
|
70
|
+
return M.top();
|
71
|
+
else // There are an even number of obersations
|
72
|
+
return (m.top() + M.top()) / 2;
|
73
|
+
}
|
74
|
+
|
75
|
+
void AddToHeaps(std::priority_queue<double, std::vector<double>, std::greater<double>>& m, std::priority_queue<double>& M, double x) {
|
76
|
+
|
77
|
+
// decide on initial heap to place element into
|
78
|
+
if (m.empty() || x < m.top())
|
79
|
+
M.push(x);
|
80
|
+
else
|
81
|
+
m.push(x);
|
82
|
+
// make sure that heaps are balanced
|
83
|
+
if (m.size() > M.size() + 1) {
|
84
|
+
M.push(m.top());
|
85
|
+
m.pop();
|
86
|
+
} else if (M.size() > m.size() + 1) {
|
87
|
+
m.push(M.top());
|
88
|
+
M.pop();
|
89
|
+
}
|
90
|
+
}
|
@@ -0,0 +1,51 @@
|
|
1
|
+
// rice
|
2
|
+
#include <rice/rice.hpp>
|
3
|
+
#include <rice/stl.hpp>
|
4
|
+
|
5
|
+
std::vector<int> EDM_multi(const std::vector<double>& Z, int min_size = 24, double beta = 0, int degree = 0);
|
6
|
+
std::vector<int> EDM_percent(const std::vector<double>& Z, int min_size = 24, double percent = 0, int degree = 0);
|
7
|
+
std::vector<int> EDM_tail(std::vector<double>& Z, int min_size = 24, double alpha = 2, double quant = 0.5);
|
8
|
+
std::vector<int> EDMX(const std::vector<double>& Z, int min_size = 24, double alpha = 2);
|
9
|
+
|
10
|
+
extern "C"
|
11
|
+
void Init_ext() {
|
12
|
+
auto rb_mBreakout = Rice::define_module("Breakout");
|
13
|
+
|
14
|
+
rb_mBreakout
|
15
|
+
.define_singleton_function(
|
16
|
+
"_detect",
|
17
|
+
[](std::vector<double> z, int min_size, const std::string& method, double alpha, std::optional<double> beta, int degree, std::optional<double> percent, bool exact) {
|
18
|
+
auto minmax = std::minmax_element(z.begin(), z.end());
|
19
|
+
auto min = *minmax.first;
|
20
|
+
auto max = *minmax.second;
|
21
|
+
auto diff = max - min;
|
22
|
+
if (diff == 0) {
|
23
|
+
// constant series
|
24
|
+
return Rice::Array();
|
25
|
+
}
|
26
|
+
for (auto i = 0; i < z.size(); i++) {
|
27
|
+
z[i] = (z[i] - min) / diff;
|
28
|
+
}
|
29
|
+
|
30
|
+
std::vector<int> res;
|
31
|
+
if (method == "amoc") {
|
32
|
+
if (exact) {
|
33
|
+
res = EDMX(z, min_size, alpha);
|
34
|
+
} else {
|
35
|
+
res = EDM_tail(z, min_size, alpha);
|
36
|
+
}
|
37
|
+
} else {
|
38
|
+
if (percent.has_value()) {
|
39
|
+
res = EDM_percent(z, min_size, *percent, degree);
|
40
|
+
} else {
|
41
|
+
res = EDM_multi(z, min_size, beta.value_or(0.008), degree);
|
42
|
+
}
|
43
|
+
}
|
44
|
+
|
45
|
+
auto a = Rice::Array();
|
46
|
+
for (auto v : res) {
|
47
|
+
a.push(v);
|
48
|
+
}
|
49
|
+
return a;
|
50
|
+
});
|
51
|
+
}
|
@@ -0,0 +1,69 @@
|
|
1
|
+
#include <algorithm>
|
2
|
+
#include <cmath>
|
3
|
+
#include <set>
|
4
|
+
|
5
|
+
extern double Linear(double x) { return 1; }
|
6
|
+
extern double Const(double x) { return 0; }
|
7
|
+
extern double Quadratic(double x) { return 2 * x + 1; }
|
8
|
+
|
9
|
+
/*
|
10
|
+
Use 2 multisets (red-black trees) to keep track of the median. One tree for the larger (m) and
|
11
|
+
one for the smaller (M) observations. Insertion and deletion in O(log(n)) and find
|
12
|
+
the median in O(1), additional memory use is O(n).
|
13
|
+
*/
|
14
|
+
|
15
|
+
// insert x into the appropriate tree
|
16
|
+
extern void insert_element(std::multiset<double>& m, std::multiset<double, std::greater<double>>& M, double x) {
|
17
|
+
|
18
|
+
if (m.empty() || x < *(m.begin()))
|
19
|
+
M.insert(x);
|
20
|
+
else
|
21
|
+
m.insert(x);
|
22
|
+
if (m.size() > M.size() + 1) {
|
23
|
+
std::multiset<double>::iterator i;
|
24
|
+
i = m.begin();
|
25
|
+
M.insert(*i);
|
26
|
+
m.erase(m.begin());
|
27
|
+
} else if (M.size() > m.size() + 1) {
|
28
|
+
std::multiset<double, std::greater<double>>::iterator i;
|
29
|
+
i = M.begin();
|
30
|
+
m.insert(*i);
|
31
|
+
M.erase(M.begin());
|
32
|
+
}
|
33
|
+
}
|
34
|
+
|
35
|
+
// given a pair of trees obtain the median
|
36
|
+
extern double get_median(const std::multiset<double>& m, const std::multiset<double, std::greater<double>>& M) {
|
37
|
+
|
38
|
+
if (m.size() > M.size())
|
39
|
+
return *(m.begin());
|
40
|
+
else if (M.size() > m.size())
|
41
|
+
return *(M.begin());
|
42
|
+
else
|
43
|
+
return (*(M.begin()) + *(m.begin())) / 2;
|
44
|
+
}
|
45
|
+
|
46
|
+
// remove x from the tree, if multiple copies of x exist only remove 1
|
47
|
+
// since this method is never called by the user directly it is assumed
|
48
|
+
// that there is at least 1 copy of x
|
49
|
+
extern void remove_element(std::multiset<double>& m, std::multiset<double, std::greater<double>>& M, double x) {
|
50
|
+
|
51
|
+
if (x < *(m.begin())) {
|
52
|
+
std::multiset<double, std::greater<double>>::iterator i = M.find(x);
|
53
|
+
M.erase(i);
|
54
|
+
} else {
|
55
|
+
std::multiset<double>::iterator i = m.find(x);
|
56
|
+
m.erase(i);
|
57
|
+
}
|
58
|
+
if (m.size() > M.size() + 1) {
|
59
|
+
std::multiset<double>::iterator i;
|
60
|
+
i = m.begin();
|
61
|
+
M.insert(*i);
|
62
|
+
m.erase(m.begin());
|
63
|
+
} else if (M.size() > m.size() + 1) {
|
64
|
+
std::multiset<double, std::greater<double>>::iterator i;
|
65
|
+
i = M.begin();
|
66
|
+
m.insert(*i);
|
67
|
+
M.erase(M.begin());
|
68
|
+
}
|
69
|
+
}
|
@@ -0,0 +1,13 @@
|
|
1
|
+
#pragma once
|
2
|
+
|
3
|
+
#include <algorithm>
|
4
|
+
#include <cmath>
|
5
|
+
#include <set>
|
6
|
+
|
7
|
+
double get_median(const std::multiset<double>&, const std::multiset<double, std::greater<double>>&);
|
8
|
+
void insert_element(std::multiset<double>&, std::multiset<double, std::greater<double>>&, double);
|
9
|
+
void remove_element(std::multiset<double>&, std::multiset<double, std::greater<double>>&, double);
|
10
|
+
|
11
|
+
extern double Linear(double x);
|
12
|
+
extern double Const(double x);
|
13
|
+
extern double Quadratic(double x);
|
@@ -0,0 +1 @@
|
|
1
|
+
require "breakout"
|
data/lib/breakout.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# ext
|
2
|
+
require "breakout/ext"
|
3
|
+
|
4
|
+
# modules
|
5
|
+
require "breakout/version"
|
6
|
+
|
7
|
+
module Breakout
|
8
|
+
def self.detect(series, min_size: 30, method: "multi", alpha: 2, beta: nil, degree: 1, percent: nil, exact: true)
|
9
|
+
raise ArgumentError, "min_size must be at least 2" if min_size < 2
|
10
|
+
raise ArgumentError, "beta and percent cannot be passed together" unless beta.nil? || percent.nil?
|
11
|
+
raise ArgumentError, "alpha must be between 0 and 2" if alpha < 0 || alpha > 2
|
12
|
+
raise ArgumentError, "degree must be 0, 1, or 2" unless [0, 1, 2].include?(degree)
|
13
|
+
raise ArgumentError, "Bad method" unless ["amoc", "multi"].include?(method)
|
14
|
+
|
15
|
+
return [] if series.size < min_size
|
16
|
+
|
17
|
+
if series.is_a?(Hash)
|
18
|
+
sorted = series.sort_by { |k, _| k }
|
19
|
+
z = sorted.map(&:last)
|
20
|
+
else
|
21
|
+
z = series
|
22
|
+
end
|
23
|
+
|
24
|
+
res = _detect(z, min_size, method, alpha, beta, degree, percent, exact)
|
25
|
+
res.map! { |i| sorted[i][0] } if series.is_a?(Hash)
|
26
|
+
res
|
27
|
+
end
|
28
|
+
end
|
metadata
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: breakout-detection
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Andrew Kane
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2021-09-03 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rice
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 4.0.2
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 4.0.2
|
27
|
+
description:
|
28
|
+
email: andrew@ankane.org
|
29
|
+
executables: []
|
30
|
+
extensions:
|
31
|
+
- ext/breakout/extconf.rb
|
32
|
+
extra_rdoc_files: []
|
33
|
+
files:
|
34
|
+
- CHANGELOG.md
|
35
|
+
- LICENSE.txt
|
36
|
+
- README.md
|
37
|
+
- ext/breakout/edm_multi.cpp
|
38
|
+
- ext/breakout/edm_percent.cpp
|
39
|
+
- ext/breakout/edm_tail.cpp
|
40
|
+
- ext/breakout/edmx.cpp
|
41
|
+
- ext/breakout/ext.cpp
|
42
|
+
- ext/breakout/extconf.rb
|
43
|
+
- ext/breakout/helper.cpp
|
44
|
+
- ext/breakout/helper.h
|
45
|
+
- lib/breakout-detection.rb
|
46
|
+
- lib/breakout.rb
|
47
|
+
- lib/breakout/version.rb
|
48
|
+
homepage: https://github.com/ankane/breakout
|
49
|
+
licenses:
|
50
|
+
- GPL-2.0-or-later
|
51
|
+
metadata: {}
|
52
|
+
post_install_message:
|
53
|
+
rdoc_options: []
|
54
|
+
require_paths:
|
55
|
+
- lib
|
56
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: '2.6'
|
61
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
62
|
+
requirements:
|
63
|
+
- - ">="
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: '0'
|
66
|
+
requirements: []
|
67
|
+
rubygems_version: 3.2.22
|
68
|
+
signing_key:
|
69
|
+
specification_version: 4
|
70
|
+
summary: Breakout detection for Ruby
|
71
|
+
test_files: []
|