streaming_stats 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +24 -0
- data/Gemfile.lock +108 -0
- data/LICENSE +21 -0
- data/README.md +58 -0
- data/Rakefile +14 -0
- data/lib/streaming_stats.rb +265 -0
- data/script/console +6 -0
- data/script/lint +5 -0
- data/script/lint-autofix +5 -0
- data/script/test +14 -0
- data/streaming_stats.rb +7 -0
- data/test/test_helper.rb +4 -0
- data/test/test_streaming_stats.rb +56 -0
- metadata +55 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 8f8c9052c66697c167070b7855fe5e7964ebaed158a698565e8592d510b16562
|
4
|
+
data.tar.gz: 204081b5f172a2cdd03ddeabd3df30ac4d89e89d2b50a0796e81cc6bd19d778a
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 74456f61026259b87b9a5f8a08c263823183e825305cfcc9ce9451571b56337a3a1a5bdab4335b974df3273c4fcf713d32d322973ed88682ce8c3a3857737000
|
7
|
+
data.tar.gz: c9f2921fed3e04a268d79fec6002f85a2393174c822ae8873d70fcfa8a1eaad66f05d252165ab4ff0a6716c1362ad42090aaef7a75f0df1c96249c4a24fd45c0
|
data/Gemfile
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
source 'https://rubygems.org'
|
4
|
+
gem 'activesupport', '~> 5.2'
|
5
|
+
|
6
|
+
group :development, :test do
|
7
|
+
gem 'byebug'
|
8
|
+
gem 'distribution'
|
9
|
+
gem 'guard'
|
10
|
+
gem 'guard-minitest'
|
11
|
+
gem 'guard-rubocop'
|
12
|
+
gem 'minitest'
|
13
|
+
gem 'mocha'
|
14
|
+
gem 'pry'
|
15
|
+
gem 'rake'
|
16
|
+
gem 'terminal-notifier-guard'
|
17
|
+
# If you update rubocop or rubocop-rails, please also update Gemfile.linting
|
18
|
+
gem 'rubocop', '~> 0.5'
|
19
|
+
gem 'rubocop-performance'
|
20
|
+
end
|
21
|
+
|
22
|
+
group :development do
|
23
|
+
gem 'simplecov'
|
24
|
+
end
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,108 @@
|
|
1
|
+
GEM
|
2
|
+
remote: https://rubygems.org/
|
3
|
+
specs:
|
4
|
+
activesupport (5.2.4.4)
|
5
|
+
concurrent-ruby (~> 1.0, >= 1.0.2)
|
6
|
+
i18n (>= 0.7, < 2)
|
7
|
+
minitest (~> 5.1)
|
8
|
+
tzinfo (~> 1.1)
|
9
|
+
ast (2.4.1)
|
10
|
+
byebug (11.1.3)
|
11
|
+
coderay (1.1.3)
|
12
|
+
concurrent-ruby (1.1.7)
|
13
|
+
distribution (0.8.0)
|
14
|
+
docile (1.3.2)
|
15
|
+
ffi (1.13.1)
|
16
|
+
formatador (0.2.5)
|
17
|
+
guard (2.16.2)
|
18
|
+
formatador (>= 0.2.4)
|
19
|
+
listen (>= 2.7, < 4.0)
|
20
|
+
lumberjack (>= 1.0.12, < 2.0)
|
21
|
+
nenv (~> 0.1)
|
22
|
+
notiffany (~> 0.0)
|
23
|
+
pry (>= 0.9.12)
|
24
|
+
shellany (~> 0.0)
|
25
|
+
thor (>= 0.18.1)
|
26
|
+
guard-compat (1.2.1)
|
27
|
+
guard-minitest (2.4.6)
|
28
|
+
guard-compat (~> 1.2)
|
29
|
+
minitest (>= 3.0)
|
30
|
+
guard-rubocop (1.4.0)
|
31
|
+
guard (~> 2.0)
|
32
|
+
rubocop (< 2.0)
|
33
|
+
i18n (1.8.5)
|
34
|
+
concurrent-ruby (~> 1.0)
|
35
|
+
listen (3.3.3)
|
36
|
+
rb-fsevent (~> 0.10, >= 0.10.3)
|
37
|
+
rb-inotify (~> 0.9, >= 0.9.10)
|
38
|
+
lumberjack (1.2.8)
|
39
|
+
method_source (1.0.0)
|
40
|
+
minitest (5.14.2)
|
41
|
+
mocha (1.11.2)
|
42
|
+
nenv (0.3.0)
|
43
|
+
notiffany (0.1.3)
|
44
|
+
nenv (~> 0.1)
|
45
|
+
shellany (~> 0.0)
|
46
|
+
parallel (1.20.1)
|
47
|
+
parser (2.7.2.0)
|
48
|
+
ast (~> 2.4.1)
|
49
|
+
pry (0.13.1)
|
50
|
+
coderay (~> 1.1)
|
51
|
+
method_source (~> 1.0)
|
52
|
+
rainbow (3.0.0)
|
53
|
+
rake (13.0.1)
|
54
|
+
rb-fsevent (0.10.4)
|
55
|
+
rb-inotify (0.10.1)
|
56
|
+
ffi (~> 1.0)
|
57
|
+
regexp_parser (2.0.0)
|
58
|
+
rexml (3.2.4)
|
59
|
+
rubocop (0.93.1)
|
60
|
+
parallel (~> 1.10)
|
61
|
+
parser (>= 2.7.1.5)
|
62
|
+
rainbow (>= 2.2.2, < 4.0)
|
63
|
+
regexp_parser (>= 1.8)
|
64
|
+
rexml
|
65
|
+
rubocop-ast (>= 0.6.0)
|
66
|
+
ruby-progressbar (~> 1.7)
|
67
|
+
unicode-display_width (>= 1.4.0, < 2.0)
|
68
|
+
rubocop-ast (1.3.0)
|
69
|
+
parser (>= 2.7.1.5)
|
70
|
+
rubocop-performance (1.9.1)
|
71
|
+
rubocop (>= 0.90.0, < 2.0)
|
72
|
+
rubocop-ast (>= 0.4.0)
|
73
|
+
ruby-progressbar (1.10.1)
|
74
|
+
shellany (0.0.1)
|
75
|
+
simplecov (0.20.0)
|
76
|
+
docile (~> 1.1)
|
77
|
+
simplecov-html (~> 0.11)
|
78
|
+
simplecov_json_formatter (~> 0.1)
|
79
|
+
simplecov-html (0.12.3)
|
80
|
+
simplecov_json_formatter (0.1.2)
|
81
|
+
terminal-notifier-guard (1.7.0)
|
82
|
+
thor (1.0.1)
|
83
|
+
thread_safe (0.3.6)
|
84
|
+
tzinfo (1.2.8)
|
85
|
+
thread_safe (~> 0.1)
|
86
|
+
unicode-display_width (1.7.0)
|
87
|
+
|
88
|
+
PLATFORMS
|
89
|
+
ruby
|
90
|
+
|
91
|
+
DEPENDENCIES
|
92
|
+
activesupport (~> 5.2)
|
93
|
+
byebug
|
94
|
+
distribution
|
95
|
+
guard
|
96
|
+
guard-minitest
|
97
|
+
guard-rubocop
|
98
|
+
minitest
|
99
|
+
mocha
|
100
|
+
pry
|
101
|
+
rake
|
102
|
+
rubocop (~> 0.5)
|
103
|
+
rubocop-performance
|
104
|
+
simplecov
|
105
|
+
terminal-notifier-guard
|
106
|
+
|
107
|
+
BUNDLED WITH
|
108
|
+
2.1.4
|
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2020 Will Fitzgerald
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
# Streaming Stats
|
2
|
+
|
3
|
+

|
4
|
+
|
5
|
+
StreamingStats is a Ruby class that takes streaming numeric data
|
6
|
+
and return descriptive statistics with minimal overhead.
|
7
|
+
A stream with n entries will only require about log2(n) storage.
|
8
|
+
The main update function is `insert`, and the object can
|
9
|
+
return:
|
10
|
+
|
11
|
+
- n (number of values inserted)
|
12
|
+
- sum
|
13
|
+
- mean
|
14
|
+
- stddev
|
15
|
+
- variance
|
16
|
+
- quantile (i.e. percentile)
|
17
|
+
- min
|
18
|
+
- max
|
19
|
+
|
20
|
+
Note that quantiles are approximate.
|
21
|
+
|
22
|
+
```irb
|
23
|
+
require 'streaming_stats'
|
24
|
+
> gk = StreamingStats.new(epsilon: 0.01)
|
25
|
+
> 10000.times {gk.insert rand}
|
26
|
+
=> 10000
|
27
|
+
> gk.n
|
28
|
+
=> 10000
|
29
|
+
> gk.sum
|
30
|
+
=> 4985.484627445102
|
31
|
+
> gk.mean
|
32
|
+
=> 0.4985484627445139
|
33
|
+
> gk.stddev
|
34
|
+
=> 0.288236161831176
|
35
|
+
> gk.variance
|
36
|
+
=> 0.08308008498716787
|
37
|
+
> gk.min
|
38
|
+
=> 0.0001414880872682156
|
39
|
+
> gk.max
|
40
|
+
=> 0.9999396732975679
|
41
|
+
> gk.quantile 0.1
|
42
|
+
=> 0.08869274826771956
|
43
|
+
> gk.quantile 0.5
|
44
|
+
=> 0.4944707523857559
|
45
|
+
> gk.quantile 0.9
|
46
|
+
=> 0.9004683944698589
|
47
|
+
> gk.quantile 0.999
|
48
|
+
=> 0.9999396732975679
|
49
|
+
gk.compression_ratio
|
50
|
+
=> 0.9927
|
51
|
+
```
|
52
|
+
|
53
|
+
The basic stats (n, sum, mean, variance, stddev) are from
|
54
|
+
my very first Gist: https://gist.github.com/willf/187846.
|
55
|
+
|
56
|
+
The approximate quartile method is a port of [streaming-percentiles-js](https://github.com/sengelha/streaming-percentiles-js).
|
57
|
+
|
58
|
+
How to calculate streaming percentiles is discussed in Steven Englehardt's series, [Calculating Percentiles on Streaming Data](https://www.stevenengelhardt.com/series/calculating-percentiles-on-streaming-data/).
|
data/Rakefile
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rake/testtask'
|
4
|
+
|
5
|
+
Rake::TestTask.new :test do |t|
|
6
|
+
t.libs << 'test'
|
7
|
+
t.pattern = 'test/**/test_*.rb'
|
8
|
+
end
|
9
|
+
|
10
|
+
desc 'Run tests and linter'
|
11
|
+
task :default do
|
12
|
+
sh %(script/lint)
|
13
|
+
sh %(script/test)
|
14
|
+
end
|
@@ -0,0 +1,265 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'ostruct'
|
4
|
+
|
5
|
+
# Public: StreamingStats class
|
6
|
+
# StreamingStats is a Ruby class that takes streaming numeric data
|
7
|
+
# and return descriptive statistics with minimal overhead.
|
8
|
+
# A stream with n entries will only require about log2(n) storage.
|
9
|
+
# The main update function is `insert`, and the object can
|
10
|
+
# return
|
11
|
+
# - n (number of values inserted)
|
12
|
+
# - sum
|
13
|
+
# - mean
|
14
|
+
# - stddev
|
15
|
+
# - variance
|
16
|
+
# - quantile (i.e. percentile)
|
17
|
+
# - min
|
18
|
+
# - max
|
19
|
+
# The sum, mean, stddev, variance functions are calculated more or less
|
20
|
+
# as in the technical description here: https://www.johndcook.com/blog/standard_deviation/
|
21
|
+
#
|
22
|
+
# The quantile method is a Ruby port of https://github.com/sengelha/streaming-percentiles-js
|
23
|
+
# The variable names, etc. of the quantile method are adopted from that project
|
24
|
+
#
|
25
|
+
# The compression size can be estimated with the method compression_size
|
26
|
+
#
|
27
|
+
# require 'streaming_stats'
|
28
|
+
# > gk = StreamingStats.new(epsilon: 0.01); 10000.times {gk.insert rand}
|
29
|
+
# => 10000
|
30
|
+
# > gk.n
|
31
|
+
# => 10000
|
32
|
+
# > gk.sum
|
33
|
+
# => 4985.484627445102
|
34
|
+
# > gk.mean
|
35
|
+
# => 0.4985484627445139
|
36
|
+
# > gk.stddev
|
37
|
+
# => 0.288236161831176
|
38
|
+
# > gk.variance
|
39
|
+
# => 0.08308008498716787
|
40
|
+
# > gk.min
|
41
|
+
# => 0.0001414880872682156
|
42
|
+
# > gk.max
|
43
|
+
# => 0.9999396732975679
|
44
|
+
# > gk.quantile 0.1
|
45
|
+
# => 0.08869274826771956
|
46
|
+
# > gk.quantile 0.5
|
47
|
+
# => 0.4944707523857559
|
48
|
+
# > gk.quantile 0.9
|
49
|
+
# => 0.9004683944698589
|
50
|
+
# > gk.quantile 0.999
|
51
|
+
# => 0.9999396732975679
|
52
|
+
# gk.compression_ratio
|
53
|
+
# => 0.9927
|
54
|
+
class StreamingStats
|
55
|
+
GK_MAX_BAND = 999_999
|
56
|
+
attr_reader :epsilon, :n, :mean, :sum
|
57
|
+
|
58
|
+
# epsilon - "epsilon is allowable error. As epsilon becomes smaller,
|
59
|
+
# the accuracy of the approximation improves, but the class
|
60
|
+
# consumes more memory" see https://www.stevenengelhardt.com/series/calculating-percentiles-on-streaming-data/
|
61
|
+
def initialize(epsilon: 0.1)
|
62
|
+
@n = 0
|
63
|
+
@mean = 0.0
|
64
|
+
@m2 = 0.0
|
65
|
+
@sum = 0.0
|
66
|
+
|
67
|
+
@epsilon = epsilon
|
68
|
+
@one_over_2e = 1 / (2 * epsilon)
|
69
|
+
@S = []
|
70
|
+
end
|
71
|
+
|
72
|
+
# Public: Returns the compression list
|
73
|
+
# For debugging only
|
74
|
+
def s
|
75
|
+
@S
|
76
|
+
end
|
77
|
+
|
78
|
+
# Public: inserts a value from a stream, updating the state
|
79
|
+
#
|
80
|
+
# value - The Numeric to be inserted
|
81
|
+
#
|
82
|
+
# Examples
|
83
|
+
#
|
84
|
+
# insert(100)
|
85
|
+
# => 100
|
86
|
+
#
|
87
|
+
# Returns the Numeric inserted
|
88
|
+
def insert(value)
|
89
|
+
## Basic stats accumulators
|
90
|
+
@n += 1
|
91
|
+
@sum += value
|
92
|
+
delta = value - @mean
|
93
|
+
@mean += (delta / @n)
|
94
|
+
@m2 += (delta * (value - @mean))
|
95
|
+
## quantile work
|
96
|
+
_compress if (@n % @one_over_2e).zero?
|
97
|
+
_do_insert value
|
98
|
+
value
|
99
|
+
end
|
100
|
+
|
101
|
+
# Public: Returns the variance of the streamed data. Initialized to 0.0
|
102
|
+
#
|
103
|
+
# Examples
|
104
|
+
#
|
105
|
+
# variance
|
106
|
+
# => 2.00
|
107
|
+
#
|
108
|
+
# Returns the variance
|
109
|
+
def variance
|
110
|
+
return 0 if @n <= 1
|
111
|
+
|
112
|
+
@m2 / @n
|
113
|
+
end
|
114
|
+
|
115
|
+
# Public: Returns the standard deviation of the streamed data. Initialized to 0.0
|
116
|
+
#
|
117
|
+
# Examples
|
118
|
+
#
|
119
|
+
# stddev
|
120
|
+
# => 1.414
|
121
|
+
#
|
122
|
+
# Returns the standard deviation
|
123
|
+
def stddev
|
124
|
+
Math.sqrt(variance)
|
125
|
+
end
|
126
|
+
|
127
|
+
# Public: Returns the approximate quantile (percentile) at phi
|
128
|
+
#
|
129
|
+
# phi - A Numeric between 0.0 and 1.0, inclusive
|
130
|
+
#
|
131
|
+
# Examples
|
132
|
+
#
|
133
|
+
# quantile(0.5)
|
134
|
+
# => 5.01
|
135
|
+
#
|
136
|
+
# Returns the approximate quantile
|
137
|
+
def quantile(phi)
|
138
|
+
throw ArgumentError.new("#{phi} must be between 0.0 and 1.0 inclusive") unless phi.between?(0.0, 1.0)
|
139
|
+
en = @epsilon * @n
|
140
|
+
r = (phi * @n).ceil
|
141
|
+
rmin = 0
|
142
|
+
(0..@S.size - 1).each do |i|
|
143
|
+
rmin += @S[i].g
|
144
|
+
rmax = rmin + @S[i].delta
|
145
|
+
return @S[i].v if r - rmin <= en && rmax - r <= en
|
146
|
+
end
|
147
|
+
throw 'Unknown error'
|
148
|
+
end
|
149
|
+
|
150
|
+
# Public: Returns the minimum value so far inserted
|
151
|
+
#
|
152
|
+
# Examples
|
153
|
+
#
|
154
|
+
# max
|
155
|
+
# => 500.0
|
156
|
+
#
|
157
|
+
# Returns the minimum value
|
158
|
+
def min
|
159
|
+
@S[0].v
|
160
|
+
end
|
161
|
+
|
162
|
+
# Public: Returns the maximum value so far inserted
|
163
|
+
#
|
164
|
+
# Examples
|
165
|
+
#
|
166
|
+
# max
|
167
|
+
# => 500.0
|
168
|
+
#
|
169
|
+
# Returns the maximum value
|
170
|
+
def max
|
171
|
+
@S.last.v
|
172
|
+
end
|
173
|
+
|
174
|
+
# Public: Returns the compression ratio achieved
|
175
|
+
#
|
176
|
+
# Examples
|
177
|
+
#
|
178
|
+
# compression_ration
|
179
|
+
# => 99.1
|
180
|
+
#
|
181
|
+
# Returns the compression ratio achieved
|
182
|
+
def compression_ratio
|
183
|
+
1.0 - (1.0 * @S.size / @n)
|
184
|
+
end
|
185
|
+
|
186
|
+
# Private: Compresses the number of values stored
|
187
|
+
def _compress
|
188
|
+
two_epsilon_n = 2 * @epsilon * @n
|
189
|
+
bands = StreamingStats._construct_band_lookup(two_epsilon_n)
|
190
|
+
# We must always keep the first and last nodes as these
|
191
|
+
# are global min/max
|
192
|
+
i = @S.length - 2
|
193
|
+
while i >= 1
|
194
|
+
if bands[@S[i].delta] <= bands[@S[i + 1].delta]
|
195
|
+
start_indx = i
|
196
|
+
g_i_star = @S[i].g
|
197
|
+
while start_indx >= 2 && (bands[@S[start_indx - 1].delta] < bands[@S[i].delta])
|
198
|
+
start_indx -= 1
|
199
|
+
g_i_star += @S[start_indx].g
|
200
|
+
end
|
201
|
+
if (g_i_star + @S[i + 1].g + @S[i + 1].delta) < two_epsilon_n
|
202
|
+
# The below is a delete_tuples([start_indx, i]) operation
|
203
|
+
merged = OpenStruct.new(
|
204
|
+
v: @S[i + 1].v,
|
205
|
+
g: g_i_star + @S[i + 1].g,
|
206
|
+
delta: @S[i + 1].delta
|
207
|
+
)
|
208
|
+
StreamingStats.splice!(@S, start_indx, 2 + (i - start_indx), merged)
|
209
|
+
i = start_indx
|
210
|
+
end
|
211
|
+
end
|
212
|
+
i -= 1
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
# Private: Constructs a band lookup
|
217
|
+
def self._construct_band_lookup(two_epsilon_n)
|
218
|
+
bands = Array.new(two_epsilon_n + 1)
|
219
|
+
bands[0] = GK_MAX_BAND
|
220
|
+
bands[two_epsilon_n] = 0 # when float?
|
221
|
+
p = two_epsilon_n.floor
|
222
|
+
(1..Math.log2(two_epsilon_n).ceil).each do |alpha|
|
223
|
+
two_alpha_minus_1 = 2**(alpha - 1)
|
224
|
+
two_alpha = 2**alpha
|
225
|
+
lower = [p - two_alpha - (p % two_alpha), 0].max
|
226
|
+
upper = p - two_alpha_minus_1 - (p % two_alpha_minus_1)
|
227
|
+
((lower + 1)..upper).each do |i|
|
228
|
+
bands[i] = alpha
|
229
|
+
end
|
230
|
+
end
|
231
|
+
bands
|
232
|
+
end
|
233
|
+
|
234
|
+
# Private: Actually does a new insertion into S
|
235
|
+
def _do_insert(v)
|
236
|
+
i = _find_insertion_index(v)
|
237
|
+
delta = _determine_delta(i)
|
238
|
+
tuple = OpenStruct.new(v: v, g: 1, delta: delta)
|
239
|
+
StreamingStats.splice!(@S, i, 0, tuple)
|
240
|
+
@S
|
241
|
+
end
|
242
|
+
|
243
|
+
# Private: Find where to insert
|
244
|
+
def _find_insertion_index(value)
|
245
|
+
i = 0
|
246
|
+
i += 1 while i < @S.size && value >= @S[i].v
|
247
|
+
i
|
248
|
+
end
|
249
|
+
|
250
|
+
# Private: Determine delta
|
251
|
+
def _determine_delta(i)
|
252
|
+
return 0 if @n < @one_over_2e
|
253
|
+
return 0 if i.zero? || i == @S.size
|
254
|
+
|
255
|
+
(2 * @epsilon * @n).floor - 1
|
256
|
+
end
|
257
|
+
|
258
|
+
# from https://stackoverflow.com/questions/6892551/array-prototype-splice-in-ruby
|
259
|
+
# Same as Javascript splice, but not put on Array prototype
|
260
|
+
def self.splice!(array, start, len, *replacements)
|
261
|
+
r = array.slice!(start, len)
|
262
|
+
array[start, 0] = replacements if replacements
|
263
|
+
r
|
264
|
+
end
|
265
|
+
end
|
data/script/console
ADDED
data/script/lint
ADDED
data/script/lint-autofix
ADDED
data/script/test
ADDED
data/streaming_stats.rb
ADDED
data/test/test_helper.rb
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'test_helper'
|
4
|
+
|
5
|
+
class StreamingStatsTest < Minitest::Test
|
6
|
+
def test_default_initialization
|
7
|
+
gk = StreamingStats.new
|
8
|
+
assert_equal gk.epsilon, 0.1
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_given_init
|
12
|
+
gk = StreamingStats.new(epsilon: 0.2)
|
13
|
+
assert_equal gk.epsilon, 0.2
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_insert
|
17
|
+
gk = StreamingStats.new
|
18
|
+
300.times do |i|
|
19
|
+
assert_equal gk.n, i
|
20
|
+
gk.insert i
|
21
|
+
assert_equal gk.n, i + 1
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_basic_stats
|
26
|
+
gk = StreamingStats.new(epsilon: 0.01)
|
27
|
+
1_000.times do
|
28
|
+
gk.insert rand
|
29
|
+
end
|
30
|
+
assert_in_delta gk.mean, 0.5, 0.03
|
31
|
+
assert_in_delta gk.variance, 1 / 12.0, 0.05
|
32
|
+
assert_in_delta gk.stddev, Math.sqrt(1 / 12.0), 0.05
|
33
|
+
assert_equal gk.n, 1000
|
34
|
+
assert_in_delta gk.sum, gk.mean * gk.n, 0.01
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_initialized_stats
|
38
|
+
gk = StreamingStats.new(epsilon: 0.01)
|
39
|
+
assert_in_delta gk.mean, 0.0, 0.001
|
40
|
+
assert_in_delta gk.variance, 0.0, 0.001
|
41
|
+
assert_in_delta gk.stddev, 0.0, 0.001
|
42
|
+
assert_equal gk.n, 0
|
43
|
+
assert_in_delta gk.sum, 0.0, 0.001
|
44
|
+
end
|
45
|
+
|
46
|
+
def test_quantiles
|
47
|
+
gk = StreamingStats.new(epsilon: 0.01)
|
48
|
+
10_000.times do
|
49
|
+
gk.insert rand
|
50
|
+
end
|
51
|
+
assert_in_delta gk.quantile(0.1), 0.1, 0.03
|
52
|
+
assert_in_delta gk.quantile(0.5), 0.5, 0.03
|
53
|
+
assert_in_delta gk.quantile(0.5), gk.mean, 0.03
|
54
|
+
assert_in_delta gk.quantile(0.9), 0.9, 0.03
|
55
|
+
end
|
56
|
+
end
|
metadata
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: streaming_stats
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Will Fitzgerald
|
8
|
+
autorequire:
|
9
|
+
bindir: script
|
10
|
+
cert_chain: []
|
11
|
+
date: 2020-10-08 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Calculates descriptive statistics from streams with minimal overhead
|
14
|
+
email: will.fitzgerald@gmail.com
|
15
|
+
executables: []
|
16
|
+
extensions: []
|
17
|
+
extra_rdoc_files: []
|
18
|
+
files:
|
19
|
+
- Gemfile
|
20
|
+
- Gemfile.lock
|
21
|
+
- LICENSE
|
22
|
+
- README.md
|
23
|
+
- Rakefile
|
24
|
+
- lib/streaming_stats.rb
|
25
|
+
- script/console
|
26
|
+
- script/lint
|
27
|
+
- script/lint-autofix
|
28
|
+
- script/test
|
29
|
+
- streaming_stats.rb
|
30
|
+
- test/test_helper.rb
|
31
|
+
- test/test_streaming_stats.rb
|
32
|
+
homepage: https://github.com/willf/streaming_stats
|
33
|
+
licenses:
|
34
|
+
- MIT
|
35
|
+
metadata: {}
|
36
|
+
post_install_message:
|
37
|
+
rdoc_options: []
|
38
|
+
require_paths:
|
39
|
+
- lib
|
40
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
41
|
+
requirements:
|
42
|
+
- - ">="
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: '2.4'
|
45
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - ">="
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: '0'
|
50
|
+
requirements: []
|
51
|
+
rubygems_version: 3.0.3
|
52
|
+
signing_key:
|
53
|
+
specification_version: 4
|
54
|
+
summary: Calculates descriptive statistics from streams
|
55
|
+
test_files: []
|