quantile 0.0.2 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/README.md +48 -3
- data/lib/quantile/estimator.rb +30 -37
- data/lib/quantile/quantile.rb +13 -1
- data/lib/quantile/version.rb +1 -1
- metadata +4 -7
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
OWFlNTllNmI5OWJlNGJiMGQzZDUxZTUzMDg2ZWRhMTI0YWFhNWIwZA==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
ZjZkNjZlNjY3NGUxMGEyMGM2ZjU2NTM0ZTFmZDkyOTU4M2RmODZkZg==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
ZmRhYmI1Njg3NDM0ZDg5MGMzZmMyYjM0ZGUyOTU0YzY1OTFmN2YwNGNlZTZh
|
10
|
+
ZGViMjkzY2ViMzIwNGI4YTY3YTY0NTZlNjcyZjI1NjA4NGNmMWNiZmM1YmVh
|
11
|
+
NDkxOTAzYjE3NmFkMjhlZjcxYzQwMWM0YTNjNjFjMmI3MzNmMzM=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
MWMyNDBiMDM3OTUzNTE4Yzg3MjAzOThlZmY5N2ZmZmVmODIwODJiMmU1MDcw
|
14
|
+
ODY5NjFmYjUyNmY5YTNhNWY2ZmY1NDAwZWNkMmUyZjA0ODZhOWNkYmZhZDJj
|
15
|
+
MWQ4ZmU1NDBiMDFmOTZjMWU2OWNhMWY5ZThkMWYyOTkzNGI5MmQ=
|
data/README.md
CHANGED
@@ -1,4 +1,49 @@
|
|
1
|
-
|
2
|
-
========================
|
1
|
+
# Quantile
|
3
2
|
|
4
|
-
Ruby Implementation of Graham Cormode and S. Muthukrishnan's Effective
|
3
|
+
Ruby Implementation of Graham Cormode and S. Muthukrishnan's [Effective
|
4
|
+
Computation of Biased Quantiles over Data Streams][1] in ICDE’05.
|
5
|
+
|
6
|
+
## Installation
|
7
|
+
|
8
|
+
```bash
|
9
|
+
gem install quantile
|
10
|
+
```
|
11
|
+
|
12
|
+
## Usage
|
13
|
+
|
14
|
+
```ruby
|
15
|
+
require 'quantile'
|
16
|
+
|
17
|
+
estimator = Quantile::Estimator.new
|
18
|
+
|
19
|
+
# Record any amount of Numeric values
|
20
|
+
10_000.times do
|
21
|
+
estimator.observe(rand)
|
22
|
+
end
|
23
|
+
|
24
|
+
# Retrieve the value of a given quantile
|
25
|
+
estimator.query(0.5)
|
26
|
+
```
|
27
|
+
|
28
|
+
## Tests [![Build Status][2]][3]
|
29
|
+
|
30
|
+
```bash
|
31
|
+
# Install dependencies
|
32
|
+
gem install bundler
|
33
|
+
bundle install
|
34
|
+
|
35
|
+
# Run tests
|
36
|
+
rake test
|
37
|
+
```
|
38
|
+
|
39
|
+
## Resources
|
40
|
+
|
41
|
+
* [Paper: Effective Computation of Biased Quantiles over Data Streams][1]
|
42
|
+
|
43
|
+
## Author
|
44
|
+
|
45
|
+
Matt T. Proud <[matt.proud@gmail.com](mailto:matt.proud@gmail.com)>
|
46
|
+
|
47
|
+
[1]: http://www.cs.rutgers.edu/~muthu/bquant.pdf
|
48
|
+
[2]: https://secure.travis-ci.org/matttproud/ruby_quantile_estimation.png?branch=master
|
49
|
+
[3]: http://travis-ci.org/matttproud/ruby_quantile_estimation
|
data/lib/quantile/estimator.rb
CHANGED
@@ -2,9 +2,9 @@
|
|
2
2
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
3
3
|
# you may not use this file except in compliance with the License.
|
4
4
|
# You may obtain a copy of the License at
|
5
|
-
#
|
5
|
+
#
|
6
6
|
# http://www.apache.org/licenses/LICENSE-2.0
|
7
|
-
#
|
7
|
+
#
|
8
8
|
# Unless required by applicable law or agreed to in writing, software
|
9
9
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
10
10
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
@@ -41,7 +41,19 @@ module Quantile
|
|
41
41
|
@head = nil
|
42
42
|
|
43
43
|
@observations = 0
|
44
|
-
|
44
|
+
end
|
45
|
+
|
46
|
+
#
|
47
|
+
# Get the quantile targets.
|
48
|
+
#
|
49
|
+
attr_accessor :invariants
|
50
|
+
|
51
|
+
#
|
52
|
+
# Get the number of observed values.
|
53
|
+
#
|
54
|
+
def observations
|
55
|
+
flush
|
56
|
+
@observations
|
45
57
|
end
|
46
58
|
|
47
59
|
#
|
@@ -54,6 +66,7 @@ module Quantile
|
|
54
66
|
if @buffer.size == BUFFER_SIZE
|
55
67
|
flush
|
56
68
|
end
|
69
|
+
@observations += 1
|
57
70
|
end
|
58
71
|
|
59
72
|
#
|
@@ -62,21 +75,20 @@ module Quantile
|
|
62
75
|
# @param rank [Float] The target quantile to retrieve. It *must* be one of
|
63
76
|
# the invariants provided in the constructor.
|
64
77
|
#
|
65
|
-
# @return [Numeric] The quantile value for the rank
|
78
|
+
# @return [Numeric, nil] The quantile value for the rank or nil if no
|
79
|
+
# observations are present.
|
66
80
|
#
|
67
81
|
def query(rank)
|
68
82
|
flush
|
69
83
|
|
70
84
|
current = @head
|
71
|
-
|
72
|
-
return 0
|
73
|
-
end
|
85
|
+
return unless current
|
74
86
|
|
75
87
|
mid_rank = (rank * @observations).floor
|
76
88
|
max_rank = mid_rank + (invariant(mid_rank, @observations) / 2).floor
|
77
89
|
|
78
90
|
rank = 0.0
|
79
|
-
while
|
91
|
+
while current.successor
|
80
92
|
rank += current.rank
|
81
93
|
if rank + current.successor.rank + current.successor.delta > max_rank
|
82
94
|
return current.value
|
@@ -90,7 +102,12 @@ module Quantile
|
|
90
102
|
|
91
103
|
private
|
92
104
|
|
105
|
+
BUFFER_SIZE = 512
|
106
|
+
|
107
|
+
class Sample < Struct.new(:value, :rank, :delta, :successor); end
|
108
|
+
|
93
109
|
def flush
|
110
|
+
return if @buffer.empty?
|
94
111
|
@buffer.sort!
|
95
112
|
replace_batch
|
96
113
|
@buffer.clear
|
@@ -98,9 +115,7 @@ module Quantile
|
|
98
115
|
end
|
99
116
|
|
100
117
|
def replace_batch
|
101
|
-
|
102
|
-
@head = record(@buffer.shift, 1, 0, nil)
|
103
|
-
end
|
118
|
+
@head ||= record(@buffer.shift, 1, 0, nil)
|
104
119
|
|
105
120
|
rank = 0.0
|
106
121
|
current = @head
|
@@ -110,12 +125,12 @@ module Quantile
|
|
110
125
|
@head = record(s, 1, 0, @head)
|
111
126
|
end
|
112
127
|
|
113
|
-
while
|
128
|
+
while current.successor && current.successor.value < s
|
114
129
|
rank += current.rank
|
115
130
|
current = current.successor
|
116
131
|
end
|
117
132
|
|
118
|
-
|
133
|
+
unless current.successor
|
119
134
|
current.successor = record(s, 1, 0, nil)
|
120
135
|
end
|
121
136
|
|
@@ -124,10 +139,7 @@ module Quantile
|
|
124
139
|
end
|
125
140
|
|
126
141
|
def record(value, rank, delta, successor)
|
127
|
-
|
128
|
-
@items += 1
|
129
|
-
|
130
|
-
return Sample.new(value,rank,delta, successor)
|
142
|
+
return Sample.new(value, rank, delta, successor)
|
131
143
|
end
|
132
144
|
|
133
145
|
def invariant(rank, n)
|
@@ -147,7 +159,7 @@ module Quantile
|
|
147
159
|
rank = 0.0
|
148
160
|
current = @head
|
149
161
|
|
150
|
-
while
|
162
|
+
while current && current.successor
|
151
163
|
if current.rank + current.successor.rank + current.successor.delta <= invariant(rank, @observations)
|
152
164
|
removed = current.successor
|
153
165
|
|
@@ -162,23 +174,4 @@ module Quantile
|
|
162
174
|
end
|
163
175
|
end
|
164
176
|
end
|
165
|
-
|
166
|
-
private
|
167
|
-
|
168
|
-
BUFFER_SIZE = 512
|
169
|
-
|
170
|
-
class Sample
|
171
|
-
attr_accessor :value
|
172
|
-
attr_accessor :rank
|
173
|
-
attr_accessor :delta
|
174
|
-
attr_accessor :successor
|
175
|
-
|
176
|
-
def initialize(value, rank, delta, successor)
|
177
|
-
@value = value
|
178
|
-
@rank = rank
|
179
|
-
@delta = delta
|
180
|
-
@successor = successor
|
181
|
-
end
|
182
|
-
end
|
183
177
|
end
|
184
|
-
|
data/lib/quantile/quantile.rb
CHANGED
@@ -18,6 +18,8 @@ module Quantile
|
|
18
18
|
# @note {Quantile} is concurrency-safe.
|
19
19
|
#
|
20
20
|
class Quantile
|
21
|
+
include Comparable
|
22
|
+
|
21
23
|
attr_reader :quantile
|
22
24
|
attr_reader :inaccuracy
|
23
25
|
|
@@ -49,6 +51,16 @@ module Quantile
|
|
49
51
|
|
50
52
|
return @coefficient_ii * rank
|
51
53
|
end
|
54
|
+
|
55
|
+
#
|
56
|
+
# Compare the given other quantile.
|
57
|
+
#
|
58
|
+
# @return [Fixnum] -1, 0, +1 or nil depending on whether the other quantile
|
59
|
+
# is less than, equal to, or greater than self. This is the basis for the
|
60
|
+
# tests in Comparable.
|
61
|
+
#
|
62
|
+
def <=>(other)
|
63
|
+
self.quantile <=> other.quantile && self.inaccuracy <=> other.inaccuracy
|
64
|
+
end
|
52
65
|
end
|
53
66
|
end
|
54
|
-
|
data/lib/quantile/version.rb
CHANGED
metadata
CHANGED
@@ -1,8 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: quantile
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
5
|
-
prerelease:
|
4
|
+
version: 0.1.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Matt T. Proud
|
@@ -27,27 +26,25 @@ files:
|
|
27
26
|
homepage: http://github.com/matttproud/ruby_quantile_estimation
|
28
27
|
licenses:
|
29
28
|
- Apache 2.0
|
29
|
+
metadata: {}
|
30
30
|
post_install_message:
|
31
31
|
rdoc_options: []
|
32
32
|
require_paths:
|
33
33
|
- lib
|
34
34
|
required_ruby_version: !ruby/object:Gem::Requirement
|
35
|
-
none: false
|
36
35
|
requirements:
|
37
36
|
- - ! '>='
|
38
37
|
- !ruby/object:Gem::Version
|
39
38
|
version: '0'
|
40
39
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
40
|
requirements:
|
43
41
|
- - ! '>='
|
44
42
|
- !ruby/object:Gem::Version
|
45
43
|
version: '0'
|
46
44
|
requirements: []
|
47
45
|
rubyforge_project:
|
48
|
-
rubygems_version: 1.
|
46
|
+
rubygems_version: 2.1.2
|
49
47
|
signing_key:
|
50
|
-
specification_version:
|
48
|
+
specification_version: 4
|
51
49
|
summary: Streaming Quantile Estimation
|
52
50
|
test_files: []
|
53
|
-
has_rdoc:
|