streaming_stats 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +24 -0
- data/Gemfile.lock +108 -0
- data/LICENSE +21 -0
- data/README.md +58 -0
- data/Rakefile +14 -0
- data/lib/streaming_stats.rb +265 -0
- data/script/console +6 -0
- data/script/lint +5 -0
- data/script/lint-autofix +5 -0
- data/script/test +14 -0
- data/streaming_stats.rb +7 -0
- data/test/test_helper.rb +4 -0
- data/test/test_streaming_stats.rb +56 -0
- metadata +55 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 8f8c9052c66697c167070b7855fe5e7964ebaed158a698565e8592d510b16562
|
4
|
+
data.tar.gz: 204081b5f172a2cdd03ddeabd3df30ac4d89e89d2b50a0796e81cc6bd19d778a
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 74456f61026259b87b9a5f8a08c263823183e825305cfcc9ce9451571b56337a3a1a5bdab4335b974df3273c4fcf713d32d322973ed88682ce8c3a3857737000
|
7
|
+
data.tar.gz: c9f2921fed3e04a268d79fec6002f85a2393174c822ae8873d70fcfa8a1eaad66f05d252165ab4ff0a6716c1362ad42090aaef7a75f0df1c96249c4a24fd45c0
|
data/Gemfile
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
source 'https://rubygems.org'
|
4
|
+
gem 'activesupport', '~> 5.2'
|
5
|
+
|
6
|
+
group :development, :test do
|
7
|
+
gem 'byebug'
|
8
|
+
gem 'distribution'
|
9
|
+
gem 'guard'
|
10
|
+
gem 'guard-minitest'
|
11
|
+
gem 'guard-rubocop'
|
12
|
+
gem 'minitest'
|
13
|
+
gem 'mocha'
|
14
|
+
gem 'pry'
|
15
|
+
gem 'rake'
|
16
|
+
gem 'terminal-notifier-guard'
|
17
|
+
# If you update rubocop or rubocop-rails, please also update Gemfile.linting
|
18
|
+
gem 'rubocop', '~> 0.5'
|
19
|
+
gem 'rubocop-performance'
|
20
|
+
end
|
21
|
+
|
22
|
+
group :development do
|
23
|
+
gem 'simplecov'
|
24
|
+
end
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,108 @@
|
|
1
|
+
GEM
|
2
|
+
remote: https://rubygems.org/
|
3
|
+
specs:
|
4
|
+
activesupport (5.2.4.4)
|
5
|
+
concurrent-ruby (~> 1.0, >= 1.0.2)
|
6
|
+
i18n (>= 0.7, < 2)
|
7
|
+
minitest (~> 5.1)
|
8
|
+
tzinfo (~> 1.1)
|
9
|
+
ast (2.4.1)
|
10
|
+
byebug (11.1.3)
|
11
|
+
coderay (1.1.3)
|
12
|
+
concurrent-ruby (1.1.7)
|
13
|
+
distribution (0.8.0)
|
14
|
+
docile (1.3.2)
|
15
|
+
ffi (1.13.1)
|
16
|
+
formatador (0.2.5)
|
17
|
+
guard (2.16.2)
|
18
|
+
formatador (>= 0.2.4)
|
19
|
+
listen (>= 2.7, < 4.0)
|
20
|
+
lumberjack (>= 1.0.12, < 2.0)
|
21
|
+
nenv (~> 0.1)
|
22
|
+
notiffany (~> 0.0)
|
23
|
+
pry (>= 0.9.12)
|
24
|
+
shellany (~> 0.0)
|
25
|
+
thor (>= 0.18.1)
|
26
|
+
guard-compat (1.2.1)
|
27
|
+
guard-minitest (2.4.6)
|
28
|
+
guard-compat (~> 1.2)
|
29
|
+
minitest (>= 3.0)
|
30
|
+
guard-rubocop (1.4.0)
|
31
|
+
guard (~> 2.0)
|
32
|
+
rubocop (< 2.0)
|
33
|
+
i18n (1.8.5)
|
34
|
+
concurrent-ruby (~> 1.0)
|
35
|
+
listen (3.3.3)
|
36
|
+
rb-fsevent (~> 0.10, >= 0.10.3)
|
37
|
+
rb-inotify (~> 0.9, >= 0.9.10)
|
38
|
+
lumberjack (1.2.8)
|
39
|
+
method_source (1.0.0)
|
40
|
+
minitest (5.14.2)
|
41
|
+
mocha (1.11.2)
|
42
|
+
nenv (0.3.0)
|
43
|
+
notiffany (0.1.3)
|
44
|
+
nenv (~> 0.1)
|
45
|
+
shellany (~> 0.0)
|
46
|
+
parallel (1.20.1)
|
47
|
+
parser (2.7.2.0)
|
48
|
+
ast (~> 2.4.1)
|
49
|
+
pry (0.13.1)
|
50
|
+
coderay (~> 1.1)
|
51
|
+
method_source (~> 1.0)
|
52
|
+
rainbow (3.0.0)
|
53
|
+
rake (13.0.1)
|
54
|
+
rb-fsevent (0.10.4)
|
55
|
+
rb-inotify (0.10.1)
|
56
|
+
ffi (~> 1.0)
|
57
|
+
regexp_parser (2.0.0)
|
58
|
+
rexml (3.2.4)
|
59
|
+
rubocop (0.93.1)
|
60
|
+
parallel (~> 1.10)
|
61
|
+
parser (>= 2.7.1.5)
|
62
|
+
rainbow (>= 2.2.2, < 4.0)
|
63
|
+
regexp_parser (>= 1.8)
|
64
|
+
rexml
|
65
|
+
rubocop-ast (>= 0.6.0)
|
66
|
+
ruby-progressbar (~> 1.7)
|
67
|
+
unicode-display_width (>= 1.4.0, < 2.0)
|
68
|
+
rubocop-ast (1.3.0)
|
69
|
+
parser (>= 2.7.1.5)
|
70
|
+
rubocop-performance (1.9.1)
|
71
|
+
rubocop (>= 0.90.0, < 2.0)
|
72
|
+
rubocop-ast (>= 0.4.0)
|
73
|
+
ruby-progressbar (1.10.1)
|
74
|
+
shellany (0.0.1)
|
75
|
+
simplecov (0.20.0)
|
76
|
+
docile (~> 1.1)
|
77
|
+
simplecov-html (~> 0.11)
|
78
|
+
simplecov_json_formatter (~> 0.1)
|
79
|
+
simplecov-html (0.12.3)
|
80
|
+
simplecov_json_formatter (0.1.2)
|
81
|
+
terminal-notifier-guard (1.7.0)
|
82
|
+
thor (1.0.1)
|
83
|
+
thread_safe (0.3.6)
|
84
|
+
tzinfo (1.2.8)
|
85
|
+
thread_safe (~> 0.1)
|
86
|
+
unicode-display_width (1.7.0)
|
87
|
+
|
88
|
+
PLATFORMS
|
89
|
+
ruby
|
90
|
+
|
91
|
+
DEPENDENCIES
|
92
|
+
activesupport (~> 5.2)
|
93
|
+
byebug
|
94
|
+
distribution
|
95
|
+
guard
|
96
|
+
guard-minitest
|
97
|
+
guard-rubocop
|
98
|
+
minitest
|
99
|
+
mocha
|
100
|
+
pry
|
101
|
+
rake
|
102
|
+
rubocop (~> 0.5)
|
103
|
+
rubocop-performance
|
104
|
+
simplecov
|
105
|
+
terminal-notifier-guard
|
106
|
+
|
107
|
+
BUNDLED WITH
|
108
|
+
2.1.4
|
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2020 Will Fitzgerald
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
# Streaming Stats
|
2
|
+
|
3
|
+
![Ruby](https://github.com/willf/streaming-stats/workflows/Ruby/badge.svg)
|
4
|
+
|
5
|
+
StreamingStats is a Ruby class that takes streaming numeric data
|
6
|
+
and return descriptive statistics with minimal overhead.
|
7
|
+
A stream with n entries will only require about log2(n) storage.
|
8
|
+
The main update function is `insert`, and the object can
|
9
|
+
return:
|
10
|
+
|
11
|
+
- n (number of values inserted)
|
12
|
+
- sum
|
13
|
+
- mean
|
14
|
+
- stddev
|
15
|
+
- variance
|
16
|
+
- quantile (i.e. percentile)
|
17
|
+
- min
|
18
|
+
- max
|
19
|
+
|
20
|
+
Note that quantiles are approximate.
|
21
|
+
|
22
|
+
```irb
|
23
|
+
require 'streaming_stats'
|
24
|
+
> gk = StreamingStats.new(epsilon: 0.01)
|
25
|
+
> 10000.times {gk.insert rand}
|
26
|
+
=> 10000
|
27
|
+
> gk.n
|
28
|
+
=> 10000
|
29
|
+
> gk.sum
|
30
|
+
=> 4985.484627445102
|
31
|
+
> gk.mean
|
32
|
+
=> 0.4985484627445139
|
33
|
+
> gk.stddev
|
34
|
+
=> 0.288236161831176
|
35
|
+
> gk.variance
|
36
|
+
=> 0.08308008498716787
|
37
|
+
> gk.min
|
38
|
+
=> 0.0001414880872682156
|
39
|
+
> gk.max
|
40
|
+
=> 0.9999396732975679
|
41
|
+
> gk.quantile 0.1
|
42
|
+
=> 0.08869274826771956
|
43
|
+
> gk.quantile 0.5
|
44
|
+
=> 0.4944707523857559
|
45
|
+
> gk.quantile 0.9
|
46
|
+
=> 0.9004683944698589
|
47
|
+
> gk.quantile 0.999
|
48
|
+
=> 0.9999396732975679
|
49
|
+
gk.compression_ratio
|
50
|
+
=> 0.9927
|
51
|
+
```
|
52
|
+
|
53
|
+
The basic stats (n, sum, mean, variance, stddev) are from
|
54
|
+
my very first Gist: https://gist.github.com/willf/187846.
|
55
|
+
|
56
|
+
The approximate quartile method is a port of [streaming-percentiles-js](https://github.com/sengelha/streaming-percentiles-js).
|
57
|
+
|
58
|
+
How to calculate streaming percentiles is discussed in Steven Englehardt's series, [Calculating Percentiles on Streaming Data](https://www.stevenengelhardt.com/series/calculating-percentiles-on-streaming-data/).
|
data/Rakefile
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rake/testtask'
|
4
|
+
|
5
|
+
Rake::TestTask.new :test do |t|
|
6
|
+
t.libs << 'test'
|
7
|
+
t.pattern = 'test/**/test_*.rb'
|
8
|
+
end
|
9
|
+
|
10
|
+
desc 'Run tests and linter'
|
11
|
+
task :default do
|
12
|
+
sh %(script/lint)
|
13
|
+
sh %(script/test)
|
14
|
+
end
|
@@ -0,0 +1,265 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'ostruct'
|
4
|
+
|
5
|
+
# Public: StreamingStats class
|
6
|
+
# StreamingStats is a Ruby class that takes streaming numeric data
|
7
|
+
# and return descriptive statistics with minimal overhead.
|
8
|
+
# A stream with n entries will only require about log2(n) storage.
|
9
|
+
# The main update function is `insert`, and the object can
|
10
|
+
# return
|
11
|
+
# - n (number of values inserted)
|
12
|
+
# - sum
|
13
|
+
# - mean
|
14
|
+
# - stddev
|
15
|
+
# - variance
|
16
|
+
# - quantile (i.e. percentile)
|
17
|
+
# - min
|
18
|
+
# - max
|
19
|
+
# The sum, mean, stddev, variance functions are calculated more or less
|
20
|
+
# as in the technical description here: https://www.johndcook.com/blog/standard_deviation/
|
21
|
+
#
|
22
|
+
# The quantile method is a Ruby port of https://github.com/sengelha/streaming-percentiles-js
|
23
|
+
# The variable names, etc. of the quantile method are adopted from that project
|
24
|
+
#
|
25
|
+
# The compression size can be estimated with the method compression_size
|
26
|
+
#
|
27
|
+
# require 'streaming_stats'
|
28
|
+
# > gk = StreamingStats.new(epsilon: 0.01); 10000.times {gk.insert rand}
|
29
|
+
# => 10000
|
30
|
+
# > gk.n
|
31
|
+
# => 10000
|
32
|
+
# > gk.sum
|
33
|
+
# => 4985.484627445102
|
34
|
+
# > gk.mean
|
35
|
+
# => 0.4985484627445139
|
36
|
+
# > gk.stddev
|
37
|
+
# => 0.288236161831176
|
38
|
+
# > gk.variance
|
39
|
+
# => 0.08308008498716787
|
40
|
+
# > gk.min
|
41
|
+
# => 0.0001414880872682156
|
42
|
+
# > gk.max
|
43
|
+
# => 0.9999396732975679
|
44
|
+
# > gk.quantile 0.1
|
45
|
+
# => 0.08869274826771956
|
46
|
+
# > gk.quantile 0.5
|
47
|
+
# => 0.4944707523857559
|
48
|
+
# > gk.quantile 0.9
|
49
|
+
# => 0.9004683944698589
|
50
|
+
# > gk.quantile 0.999
|
51
|
+
# => 0.9999396732975679
|
52
|
+
# gk.compression_ratio
|
53
|
+
# => 0.9927
|
54
|
+
class StreamingStats
|
55
|
+
GK_MAX_BAND = 999_999
|
56
|
+
attr_reader :epsilon, :n, :mean, :sum
|
57
|
+
|
58
|
+
# epsilon - "epsilon is allowable error. As epsilon becomes smaller,
|
59
|
+
# the accuracy of the approximation improves, but the class
|
60
|
+
# consumes more memory" see https://www.stevenengelhardt.com/series/calculating-percentiles-on-streaming-data/
|
61
|
+
def initialize(epsilon: 0.1)
|
62
|
+
@n = 0
|
63
|
+
@mean = 0.0
|
64
|
+
@m2 = 0.0
|
65
|
+
@sum = 0.0
|
66
|
+
|
67
|
+
@epsilon = epsilon
|
68
|
+
@one_over_2e = 1 / (2 * epsilon)
|
69
|
+
@S = []
|
70
|
+
end
|
71
|
+
|
72
|
+
# Public: Returns the compression list
|
73
|
+
# For debugging only
|
74
|
+
def s
|
75
|
+
@S
|
76
|
+
end
|
77
|
+
|
78
|
+
# Public: inserts a value from a stream, updating the state
|
79
|
+
#
|
80
|
+
# value - The Numeric to be inserted
|
81
|
+
#
|
82
|
+
# Examples
|
83
|
+
#
|
84
|
+
# insert(100)
|
85
|
+
# => 100
|
86
|
+
#
|
87
|
+
# Returns the Numeric inserted
|
88
|
+
def insert(value)
|
89
|
+
## Basic stats accumulators
|
90
|
+
@n += 1
|
91
|
+
@sum += value
|
92
|
+
delta = value - @mean
|
93
|
+
@mean += (delta / @n)
|
94
|
+
@m2 += (delta * (value - @mean))
|
95
|
+
## quantile work
|
96
|
+
_compress if (@n % @one_over_2e).zero?
|
97
|
+
_do_insert value
|
98
|
+
value
|
99
|
+
end
|
100
|
+
|
101
|
+
# Public: Returns the variance of the streamed data. Initialized to 0.0
|
102
|
+
#
|
103
|
+
# Examples
|
104
|
+
#
|
105
|
+
# variance
|
106
|
+
# => 2.00
|
107
|
+
#
|
108
|
+
# Returns the variance
|
109
|
+
def variance
|
110
|
+
return 0 if @n <= 1
|
111
|
+
|
112
|
+
@m2 / @n
|
113
|
+
end
|
114
|
+
|
115
|
+
# Public: Returns the standard deviation of the streamed data. Initialized to 0.0
|
116
|
+
#
|
117
|
+
# Examples
|
118
|
+
#
|
119
|
+
# stddev
|
120
|
+
# => 1.414
|
121
|
+
#
|
122
|
+
# Returns the standard deviation
|
123
|
+
def stddev
|
124
|
+
Math.sqrt(variance)
|
125
|
+
end
|
126
|
+
|
127
|
+
# Public: Returns the approximate quantile (percentile) at phi
|
128
|
+
#
|
129
|
+
# phi - A Numeric between 0.0 and 1.0, inclusive
|
130
|
+
#
|
131
|
+
# Examples
|
132
|
+
#
|
133
|
+
# quantile(0.5)
|
134
|
+
# => 5.01
|
135
|
+
#
|
136
|
+
# Returns the approximate quantile
|
137
|
+
def quantile(phi)
|
138
|
+
throw ArgumentError.new("#{phi} must be between 0.0 and 1.0 inclusive") unless phi.between?(0.0, 1.0)
|
139
|
+
en = @epsilon * @n
|
140
|
+
r = (phi * @n).ceil
|
141
|
+
rmin = 0
|
142
|
+
(0..@S.size - 1).each do |i|
|
143
|
+
rmin += @S[i].g
|
144
|
+
rmax = rmin + @S[i].delta
|
145
|
+
return @S[i].v if r - rmin <= en && rmax - r <= en
|
146
|
+
end
|
147
|
+
throw 'Unknown error'
|
148
|
+
end
|
149
|
+
|
150
|
+
# Public: Returns the minimum value so far inserted
|
151
|
+
#
|
152
|
+
# Examples
|
153
|
+
#
|
154
|
+
# max
|
155
|
+
# => 500.0
|
156
|
+
#
|
157
|
+
# Returns the minimum value
|
158
|
+
def min
|
159
|
+
@S[0].v
|
160
|
+
end
|
161
|
+
|
162
|
+
# Public: Returns the maximum value so far inserted
|
163
|
+
#
|
164
|
+
# Examples
|
165
|
+
#
|
166
|
+
# max
|
167
|
+
# => 500.0
|
168
|
+
#
|
169
|
+
# Returns the maximum value
|
170
|
+
def max
|
171
|
+
@S.last.v
|
172
|
+
end
|
173
|
+
|
174
|
+
# Public: Returns the compression ratio achieved
|
175
|
+
#
|
176
|
+
# Examples
|
177
|
+
#
|
178
|
+
# compression_ration
|
179
|
+
# => 99.1
|
180
|
+
#
|
181
|
+
# Returns the compression ratio achieved
|
182
|
+
def compression_ratio
|
183
|
+
1.0 - (1.0 * @S.size / @n)
|
184
|
+
end
|
185
|
+
|
186
|
+
# Private: Compresses the number of values stored
|
187
|
+
def _compress
|
188
|
+
two_epsilon_n = 2 * @epsilon * @n
|
189
|
+
bands = StreamingStats._construct_band_lookup(two_epsilon_n)
|
190
|
+
# We must always keep the first and last nodes as these
|
191
|
+
# are global min/max
|
192
|
+
i = @S.length - 2
|
193
|
+
while i >= 1
|
194
|
+
if bands[@S[i].delta] <= bands[@S[i + 1].delta]
|
195
|
+
start_indx = i
|
196
|
+
g_i_star = @S[i].g
|
197
|
+
while start_indx >= 2 && (bands[@S[start_indx - 1].delta] < bands[@S[i].delta])
|
198
|
+
start_indx -= 1
|
199
|
+
g_i_star += @S[start_indx].g
|
200
|
+
end
|
201
|
+
if (g_i_star + @S[i + 1].g + @S[i + 1].delta) < two_epsilon_n
|
202
|
+
# The below is a delete_tuples([start_indx, i]) operation
|
203
|
+
merged = OpenStruct.new(
|
204
|
+
v: @S[i + 1].v,
|
205
|
+
g: g_i_star + @S[i + 1].g,
|
206
|
+
delta: @S[i + 1].delta
|
207
|
+
)
|
208
|
+
StreamingStats.splice!(@S, start_indx, 2 + (i - start_indx), merged)
|
209
|
+
i = start_indx
|
210
|
+
end
|
211
|
+
end
|
212
|
+
i -= 1
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
# Private: Constructs a band lookup
|
217
|
+
def self._construct_band_lookup(two_epsilon_n)
|
218
|
+
bands = Array.new(two_epsilon_n + 1)
|
219
|
+
bands[0] = GK_MAX_BAND
|
220
|
+
bands[two_epsilon_n] = 0 # when float?
|
221
|
+
p = two_epsilon_n.floor
|
222
|
+
(1..Math.log2(two_epsilon_n).ceil).each do |alpha|
|
223
|
+
two_alpha_minus_1 = 2**(alpha - 1)
|
224
|
+
two_alpha = 2**alpha
|
225
|
+
lower = [p - two_alpha - (p % two_alpha), 0].max
|
226
|
+
upper = p - two_alpha_minus_1 - (p % two_alpha_minus_1)
|
227
|
+
((lower + 1)..upper).each do |i|
|
228
|
+
bands[i] = alpha
|
229
|
+
end
|
230
|
+
end
|
231
|
+
bands
|
232
|
+
end
|
233
|
+
|
234
|
+
# Private: Actually does a new insertion into S
|
235
|
+
def _do_insert(v)
|
236
|
+
i = _find_insertion_index(v)
|
237
|
+
delta = _determine_delta(i)
|
238
|
+
tuple = OpenStruct.new(v: v, g: 1, delta: delta)
|
239
|
+
StreamingStats.splice!(@S, i, 0, tuple)
|
240
|
+
@S
|
241
|
+
end
|
242
|
+
|
243
|
+
# Private: Find where to insert
|
244
|
+
def _find_insertion_index(value)
|
245
|
+
i = 0
|
246
|
+
i += 1 while i < @S.size && value >= @S[i].v
|
247
|
+
i
|
248
|
+
end
|
249
|
+
|
250
|
+
# Private: Determine delta
|
251
|
+
def _determine_delta(i)
|
252
|
+
return 0 if @n < @one_over_2e
|
253
|
+
return 0 if i.zero? || i == @S.size
|
254
|
+
|
255
|
+
(2 * @epsilon * @n).floor - 1
|
256
|
+
end
|
257
|
+
|
258
|
+
# from https://stackoverflow.com/questions/6892551/array-prototype-splice-in-ruby
|
259
|
+
# Same as Javascript splice, but not put on Array prototype
|
260
|
+
def self.splice!(array, start, len, *replacements)
|
261
|
+
r = array.slice!(start, len)
|
262
|
+
array[start, 0] = replacements if replacements
|
263
|
+
r
|
264
|
+
end
|
265
|
+
end
|
data/script/console
ADDED
data/script/lint
ADDED
data/script/lint-autofix
ADDED
data/script/test
ADDED
data/streaming_stats.rb
ADDED
data/test/test_helper.rb
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'test_helper'
|
4
|
+
|
5
|
+
class StreamingStatsTest < Minitest::Test
|
6
|
+
def test_default_initialization
|
7
|
+
gk = StreamingStats.new
|
8
|
+
assert_equal gk.epsilon, 0.1
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_given_init
|
12
|
+
gk = StreamingStats.new(epsilon: 0.2)
|
13
|
+
assert_equal gk.epsilon, 0.2
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_insert
|
17
|
+
gk = StreamingStats.new
|
18
|
+
300.times do |i|
|
19
|
+
assert_equal gk.n, i
|
20
|
+
gk.insert i
|
21
|
+
assert_equal gk.n, i + 1
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_basic_stats
|
26
|
+
gk = StreamingStats.new(epsilon: 0.01)
|
27
|
+
1_000.times do
|
28
|
+
gk.insert rand
|
29
|
+
end
|
30
|
+
assert_in_delta gk.mean, 0.5, 0.03
|
31
|
+
assert_in_delta gk.variance, 1 / 12.0, 0.05
|
32
|
+
assert_in_delta gk.stddev, Math.sqrt(1 / 12.0), 0.05
|
33
|
+
assert_equal gk.n, 1000
|
34
|
+
assert_in_delta gk.sum, gk.mean * gk.n, 0.01
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_initialized_stats
|
38
|
+
gk = StreamingStats.new(epsilon: 0.01)
|
39
|
+
assert_in_delta gk.mean, 0.0, 0.001
|
40
|
+
assert_in_delta gk.variance, 0.0, 0.001
|
41
|
+
assert_in_delta gk.stddev, 0.0, 0.001
|
42
|
+
assert_equal gk.n, 0
|
43
|
+
assert_in_delta gk.sum, 0.0, 0.001
|
44
|
+
end
|
45
|
+
|
46
|
+
def test_quantiles
|
47
|
+
gk = StreamingStats.new(epsilon: 0.01)
|
48
|
+
10_000.times do
|
49
|
+
gk.insert rand
|
50
|
+
end
|
51
|
+
assert_in_delta gk.quantile(0.1), 0.1, 0.03
|
52
|
+
assert_in_delta gk.quantile(0.5), 0.5, 0.03
|
53
|
+
assert_in_delta gk.quantile(0.5), gk.mean, 0.03
|
54
|
+
assert_in_delta gk.quantile(0.9), 0.9, 0.03
|
55
|
+
end
|
56
|
+
end
|
metadata
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: streaming_stats
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Will Fitzgerald
|
8
|
+
autorequire:
|
9
|
+
bindir: script
|
10
|
+
cert_chain: []
|
11
|
+
date: 2020-10-08 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Calculates descriptive statistics from streams with minimal overhead
|
14
|
+
email: will.fitzgerald@gmail.com
|
15
|
+
executables: []
|
16
|
+
extensions: []
|
17
|
+
extra_rdoc_files: []
|
18
|
+
files:
|
19
|
+
- Gemfile
|
20
|
+
- Gemfile.lock
|
21
|
+
- LICENSE
|
22
|
+
- README.md
|
23
|
+
- Rakefile
|
24
|
+
- lib/streaming_stats.rb
|
25
|
+
- script/console
|
26
|
+
- script/lint
|
27
|
+
- script/lint-autofix
|
28
|
+
- script/test
|
29
|
+
- streaming_stats.rb
|
30
|
+
- test/test_helper.rb
|
31
|
+
- test/test_streaming_stats.rb
|
32
|
+
homepage: https://github.com/willf/streaming_stats
|
33
|
+
licenses:
|
34
|
+
- MIT
|
35
|
+
metadata: {}
|
36
|
+
post_install_message:
|
37
|
+
rdoc_options: []
|
38
|
+
require_paths:
|
39
|
+
- lib
|
40
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
41
|
+
requirements:
|
42
|
+
- - ">="
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: '2.4'
|
45
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - ">="
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: '0'
|
50
|
+
requirements: []
|
51
|
+
rubygems_version: 3.0.3
|
52
|
+
signing_key:
|
53
|
+
specification_version: 4
|
54
|
+
summary: Calculates descriptive statistics from streams
|
55
|
+
test_files: []
|