quantile 0.0.2 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/README.md +48 -3
- data/lib/quantile/estimator.rb +30 -37
- data/lib/quantile/quantile.rb +13 -1
- data/lib/quantile/version.rb +1 -1
- metadata +4 -7
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
OWFlNTllNmI5OWJlNGJiMGQzZDUxZTUzMDg2ZWRhMTI0YWFhNWIwZA==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
ZjZkNjZlNjY3NGUxMGEyMGM2ZjU2NTM0ZTFmZDkyOTU4M2RmODZkZg==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
ZmRhYmI1Njg3NDM0ZDg5MGMzZmMyYjM0ZGUyOTU0YzY1OTFmN2YwNGNlZTZh
|
10
|
+
ZGViMjkzY2ViMzIwNGI4YTY3YTY0NTZlNjcyZjI1NjA4NGNmMWNiZmM1YmVh
|
11
|
+
NDkxOTAzYjE3NmFkMjhlZjcxYzQwMWM0YTNjNjFjMmI3MzNmMzM=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
MWMyNDBiMDM3OTUzNTE4Yzg3MjAzOThlZmY5N2ZmZmVmODIwODJiMmU1MDcw
|
14
|
+
ODY5NjFmYjUyNmY5YTNhNWY2ZmY1NDAwZWNkMmUyZjA0ODZhOWNkYmZhZDJj
|
15
|
+
MWQ4ZmU1NDBiMDFmOTZjMWU2OWNhMWY5ZThkMWYyOTkzNGI5MmQ=
|
data/README.md
CHANGED
@@ -1,4 +1,49 @@
|
|
1
|
-
|
2
|
-
========================
|
1
|
+
# Quantile
|
3
2
|
|
4
|
-
Ruby Implementation of Graham Cormode and S. Muthukrishnan's Effective
|
3
|
+
Ruby Implementation of Graham Cormode and S. Muthukrishnan's [Effective
|
4
|
+
Computation of Biased Quantiles over Data Streams][1] in ICDE’05.
|
5
|
+
|
6
|
+
## Installation
|
7
|
+
|
8
|
+
```bash
|
9
|
+
gem install quantile
|
10
|
+
```
|
11
|
+
|
12
|
+
## Usage
|
13
|
+
|
14
|
+
```ruby
|
15
|
+
require 'quantile'
|
16
|
+
|
17
|
+
estimator = Quantile::Estimator.new
|
18
|
+
|
19
|
+
# Record any amount of Numeric values
|
20
|
+
10_000.times do
|
21
|
+
estimator.observe(rand)
|
22
|
+
end
|
23
|
+
|
24
|
+
# Retrieve the value of a given quantile
|
25
|
+
estimator.query(0.5)
|
26
|
+
```
|
27
|
+
|
28
|
+
## Tests [![Build Status][2]][3]
|
29
|
+
|
30
|
+
```bash
|
31
|
+
# Install dependencies
|
32
|
+
gem install bundler
|
33
|
+
bundle install
|
34
|
+
|
35
|
+
# Run tests
|
36
|
+
rake test
|
37
|
+
```
|
38
|
+
|
39
|
+
## Resources
|
40
|
+
|
41
|
+
* [Paper: Effective Computation of Biased Quantiles over Data Streams][1]
|
42
|
+
|
43
|
+
## Author
|
44
|
+
|
45
|
+
Matt T. Proud <[matt.proud@gmail.com](mailto:matt.proud@gmail.com)>
|
46
|
+
|
47
|
+
[1]: http://www.cs.rutgers.edu/~muthu/bquant.pdf
|
48
|
+
[2]: https://secure.travis-ci.org/matttproud/ruby_quantile_estimation.png?branch=master
|
49
|
+
[3]: http://travis-ci.org/matttproud/ruby_quantile_estimation
|
data/lib/quantile/estimator.rb
CHANGED
@@ -2,9 +2,9 @@
|
|
2
2
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
3
3
|
# you may not use this file except in compliance with the License.
|
4
4
|
# You may obtain a copy of the License at
|
5
|
-
#
|
5
|
+
#
|
6
6
|
# http://www.apache.org/licenses/LICENSE-2.0
|
7
|
-
#
|
7
|
+
#
|
8
8
|
# Unless required by applicable law or agreed to in writing, software
|
9
9
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
10
10
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
@@ -41,7 +41,19 @@ module Quantile
|
|
41
41
|
@head = nil
|
42
42
|
|
43
43
|
@observations = 0
|
44
|
-
|
44
|
+
end
|
45
|
+
|
46
|
+
#
|
47
|
+
# Get the quantile targets.
|
48
|
+
#
|
49
|
+
attr_accessor :invariants
|
50
|
+
|
51
|
+
#
|
52
|
+
# Get the number of observed values.
|
53
|
+
#
|
54
|
+
def observations
|
55
|
+
flush
|
56
|
+
@observations
|
45
57
|
end
|
46
58
|
|
47
59
|
#
|
@@ -54,6 +66,7 @@ module Quantile
|
|
54
66
|
if @buffer.size == BUFFER_SIZE
|
55
67
|
flush
|
56
68
|
end
|
69
|
+
@observations += 1
|
57
70
|
end
|
58
71
|
|
59
72
|
#
|
@@ -62,21 +75,20 @@ module Quantile
|
|
62
75
|
# @param rank [Float] The target quantile to retrieve. It *must* be one of
|
63
76
|
# the invariants provided in the constructor.
|
64
77
|
#
|
65
|
-
# @return [Numeric] The quantile value for the rank
|
78
|
+
# @return [Numeric, nil] The quantile value for the rank or nil if no
|
79
|
+
# observations are present.
|
66
80
|
#
|
67
81
|
def query(rank)
|
68
82
|
flush
|
69
83
|
|
70
84
|
current = @head
|
71
|
-
|
72
|
-
return 0
|
73
|
-
end
|
85
|
+
return unless current
|
74
86
|
|
75
87
|
mid_rank = (rank * @observations).floor
|
76
88
|
max_rank = mid_rank + (invariant(mid_rank, @observations) / 2).floor
|
77
89
|
|
78
90
|
rank = 0.0
|
79
|
-
while
|
91
|
+
while current.successor
|
80
92
|
rank += current.rank
|
81
93
|
if rank + current.successor.rank + current.successor.delta > max_rank
|
82
94
|
return current.value
|
@@ -90,7 +102,12 @@ module Quantile
|
|
90
102
|
|
91
103
|
private
|
92
104
|
|
105
|
+
BUFFER_SIZE = 512
|
106
|
+
|
107
|
+
class Sample < Struct.new(:value, :rank, :delta, :successor); end
|
108
|
+
|
93
109
|
def flush
|
110
|
+
return if @buffer.empty?
|
94
111
|
@buffer.sort!
|
95
112
|
replace_batch
|
96
113
|
@buffer.clear
|
@@ -98,9 +115,7 @@ module Quantile
|
|
98
115
|
end
|
99
116
|
|
100
117
|
def replace_batch
|
101
|
-
|
102
|
-
@head = record(@buffer.shift, 1, 0, nil)
|
103
|
-
end
|
118
|
+
@head ||= record(@buffer.shift, 1, 0, nil)
|
104
119
|
|
105
120
|
rank = 0.0
|
106
121
|
current = @head
|
@@ -110,12 +125,12 @@ module Quantile
|
|
110
125
|
@head = record(s, 1, 0, @head)
|
111
126
|
end
|
112
127
|
|
113
|
-
while
|
128
|
+
while current.successor && current.successor.value < s
|
114
129
|
rank += current.rank
|
115
130
|
current = current.successor
|
116
131
|
end
|
117
132
|
|
118
|
-
|
133
|
+
unless current.successor
|
119
134
|
current.successor = record(s, 1, 0, nil)
|
120
135
|
end
|
121
136
|
|
@@ -124,10 +139,7 @@ module Quantile
|
|
124
139
|
end
|
125
140
|
|
126
141
|
def record(value, rank, delta, successor)
|
127
|
-
|
128
|
-
@items += 1
|
129
|
-
|
130
|
-
return Sample.new(value,rank,delta, successor)
|
142
|
+
return Sample.new(value, rank, delta, successor)
|
131
143
|
end
|
132
144
|
|
133
145
|
def invariant(rank, n)
|
@@ -147,7 +159,7 @@ module Quantile
|
|
147
159
|
rank = 0.0
|
148
160
|
current = @head
|
149
161
|
|
150
|
-
while
|
162
|
+
while current && current.successor
|
151
163
|
if current.rank + current.successor.rank + current.successor.delta <= invariant(rank, @observations)
|
152
164
|
removed = current.successor
|
153
165
|
|
@@ -162,23 +174,4 @@ module Quantile
|
|
162
174
|
end
|
163
175
|
end
|
164
176
|
end
|
165
|
-
|
166
|
-
private
|
167
|
-
|
168
|
-
BUFFER_SIZE = 512
|
169
|
-
|
170
|
-
class Sample
|
171
|
-
attr_accessor :value
|
172
|
-
attr_accessor :rank
|
173
|
-
attr_accessor :delta
|
174
|
-
attr_accessor :successor
|
175
|
-
|
176
|
-
def initialize(value, rank, delta, successor)
|
177
|
-
@value = value
|
178
|
-
@rank = rank
|
179
|
-
@delta = delta
|
180
|
-
@successor = successor
|
181
|
-
end
|
182
|
-
end
|
183
177
|
end
|
184
|
-
|
data/lib/quantile/quantile.rb
CHANGED
@@ -18,6 +18,8 @@ module Quantile
|
|
18
18
|
# @note {Quantile} is concurrency-safe.
|
19
19
|
#
|
20
20
|
class Quantile
|
21
|
+
include Comparable
|
22
|
+
|
21
23
|
attr_reader :quantile
|
22
24
|
attr_reader :inaccuracy
|
23
25
|
|
@@ -49,6 +51,16 @@ module Quantile
|
|
49
51
|
|
50
52
|
return @coefficient_ii * rank
|
51
53
|
end
|
54
|
+
|
55
|
+
#
|
56
|
+
# Compare the given other quantile.
|
57
|
+
#
|
58
|
+
# @return [Fixnum] -1, 0, +1 or nil depending on whether the other quantile
|
59
|
+
# is less than, equal to, or greater than self. This is the basis for the
|
60
|
+
# tests in Comparable.
|
61
|
+
#
|
62
|
+
def <=>(other)
|
63
|
+
self.quantile <=> other.quantile && self.inaccuracy <=> other.inaccuracy
|
64
|
+
end
|
52
65
|
end
|
53
66
|
end
|
54
|
-
|
data/lib/quantile/version.rb
CHANGED
metadata
CHANGED
@@ -1,8 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: quantile
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
5
|
-
prerelease:
|
4
|
+
version: 0.1.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Matt T. Proud
|
@@ -27,27 +26,25 @@ files:
|
|
27
26
|
homepage: http://github.com/matttproud/ruby_quantile_estimation
|
28
27
|
licenses:
|
29
28
|
- Apache 2.0
|
29
|
+
metadata: {}
|
30
30
|
post_install_message:
|
31
31
|
rdoc_options: []
|
32
32
|
require_paths:
|
33
33
|
- lib
|
34
34
|
required_ruby_version: !ruby/object:Gem::Requirement
|
35
|
-
none: false
|
36
35
|
requirements:
|
37
36
|
- - ! '>='
|
38
37
|
- !ruby/object:Gem::Version
|
39
38
|
version: '0'
|
40
39
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
40
|
requirements:
|
43
41
|
- - ! '>='
|
44
42
|
- !ruby/object:Gem::Version
|
45
43
|
version: '0'
|
46
44
|
requirements: []
|
47
45
|
rubyforge_project:
|
48
|
-
rubygems_version: 1.
|
46
|
+
rubygems_version: 2.1.2
|
49
47
|
signing_key:
|
50
|
-
specification_version:
|
48
|
+
specification_version: 4
|
51
49
|
summary: Streaming Quantile Estimation
|
52
50
|
test_files: []
|
53
|
-
has_rdoc:
|