quantile 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ OWFlNTllNmI5OWJlNGJiMGQzZDUxZTUzMDg2ZWRhMTI0YWFhNWIwZA==
5
+ data.tar.gz: !binary |-
6
+ ZjZkNjZlNjY3NGUxMGEyMGM2ZjU2NTM0ZTFmZDkyOTU4M2RmODZkZg==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ ZmRhYmI1Njg3NDM0ZDg5MGMzZmMyYjM0ZGUyOTU0YzY1OTFmN2YwNGNlZTZh
10
+ ZGViMjkzY2ViMzIwNGI4YTY3YTY0NTZlNjcyZjI1NjA4NGNmMWNiZmM1YmVh
11
+ NDkxOTAzYjE3NmFkMjhlZjcxYzQwMWM0YTNjNjFjMmI3MzNmMzM=
12
+ data.tar.gz: !binary |-
13
+ MWMyNDBiMDM3OTUzNTE4Yzg3MjAzOThlZmY5N2ZmZmVmODIwODJiMmU1MDcw
14
+ ODY5NjFmYjUyNmY5YTNhNWY2ZmY1NDAwZWNkMmUyZjA0ODZhOWNkYmZhZDJj
15
+ MWQ4ZmU1NDBiMDFmOTZjMWU2OWNhMWY5ZThkMWYyOTkzNGI5MmQ=
data/README.md CHANGED
@@ -1,4 +1,49 @@
1
- ruby_quantile_estimation
2
- ========================
1
+ # Quantile
3
2
 
4
- Ruby Implementation of Graham Cormode and S. Muthukrishnan's Effective Computation of Biased Quantiles over Data Streams in ICDE’05
3
+ Ruby Implementation of Graham Cormode and S. Muthukrishnan's [Effective
4
+ Computation of Biased Quantiles over Data Streams][1] in ICDE’05.
5
+
6
+ ## Installation
7
+
8
+ ```bash
9
+ gem install quantile
10
+ ```
11
+
12
+ ## Usage
13
+
14
+ ```ruby
15
+ require 'quantile'
16
+
17
+ estimator = Quantile::Estimator.new
18
+
19
+ # Record any amount of Numeric values
20
+ 10_000.times do
21
+ estimator.observe(rand)
22
+ end
23
+
24
+ # Retrieve the value of a given quantile
25
+ estimator.query(0.5)
26
+ ```
27
+
28
+ ## Tests [![Build Status][2]][3]
29
+
30
+ ```bash
31
+ # Install dependencies
32
+ gem install bundler
33
+ bundle install
34
+
35
+ # Run tests
36
+ rake test
37
+ ```
38
+
39
+ ## Resources
40
+
41
+ * [Paper: Effective Computation of Biased Quantiles over Data Streams][1]
42
+
43
+ ## Author
44
+
45
+ Matt T. Proud <[matt.proud@gmail.com](mailto:matt.proud@gmail.com)>
46
+
47
+ [1]: http://www.cs.rutgers.edu/~muthu/bquant.pdf
48
+ [2]: https://secure.travis-ci.org/matttproud/ruby_quantile_estimation.png?branch=master
49
+ [3]: http://travis-ci.org/matttproud/ruby_quantile_estimation
@@ -2,9 +2,9 @@
2
2
  # Licensed under the Apache License, Version 2.0 (the "License");
3
3
  # you may not use this file except in compliance with the License.
4
4
  # You may obtain a copy of the License at
5
- #
5
+ #
6
6
  # http://www.apache.org/licenses/LICENSE-2.0
7
- #
7
+ #
8
8
  # Unless required by applicable law or agreed to in writing, software
9
9
  # distributed under the License is distributed on an "AS IS" BASIS,
10
10
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -41,7 +41,19 @@ module Quantile
41
41
  @head = nil
42
42
 
43
43
  @observations = 0
44
- @items = 0
44
+ end
45
+
46
+ #
47
+ # Get the quantile targets.
48
+ #
49
+ attr_accessor :invariants
50
+
51
+ #
52
+ # Get the number of observed values.
53
+ #
54
+ def observations
55
+ flush
56
+ @observations
45
57
  end
46
58
 
47
59
  #
@@ -54,6 +66,7 @@ module Quantile
54
66
  if @buffer.size == BUFFER_SIZE
55
67
  flush
56
68
  end
69
+ @observations += 1
57
70
  end
58
71
 
59
72
  #
@@ -62,21 +75,20 @@ module Quantile
62
75
  # @param rank [Float] The target quantile to retrieve. It *must* be one of
63
76
  # the invariants provided in the constructor.
64
77
  #
65
- # @return [Numeric] The quantile value for the rank.
78
+ # @return [Numeric, nil] The quantile value for the rank or nil if no
79
+ # observations are present.
66
80
  #
67
81
  def query(rank)
68
82
  flush
69
83
 
70
84
  current = @head
71
- if current.nil?
72
- return 0
73
- end
85
+ return unless current
74
86
 
75
87
  mid_rank = (rank * @observations).floor
76
88
  max_rank = mid_rank + (invariant(mid_rank, @observations) / 2).floor
77
89
 
78
90
  rank = 0.0
79
- while !current.successor.nil?
91
+ while current.successor
80
92
  rank += current.rank
81
93
  if rank + current.successor.rank + current.successor.delta > max_rank
82
94
  return current.value
@@ -90,7 +102,12 @@ module Quantile
90
102
 
91
103
  private
92
104
 
105
+ BUFFER_SIZE = 512
106
+
107
+ class Sample < Struct.new(:value, :rank, :delta, :successor); end
108
+
93
109
  def flush
110
+ return if @buffer.empty?
94
111
  @buffer.sort!
95
112
  replace_batch
96
113
  @buffer.clear
@@ -98,9 +115,7 @@ module Quantile
98
115
  end
99
116
 
100
117
  def replace_batch
101
- if @head.nil?
102
- @head = record(@buffer.shift, 1, 0, nil)
103
- end
118
+ @head ||= record(@buffer.shift, 1, 0, nil)
104
119
 
105
120
  rank = 0.0
106
121
  current = @head
@@ -110,12 +125,12 @@ module Quantile
110
125
  @head = record(s, 1, 0, @head)
111
126
  end
112
127
 
113
- while !current.successor.nil? && current.successor.value < s
128
+ while current.successor && current.successor.value < s
114
129
  rank += current.rank
115
130
  current = current.successor
116
131
  end
117
132
 
118
- if current.successor.nil?
133
+ unless current.successor
119
134
  current.successor = record(s, 1, 0, nil)
120
135
  end
121
136
 
@@ -124,10 +139,7 @@ module Quantile
124
139
  end
125
140
 
126
141
  def record(value, rank, delta, successor)
127
- @observations += 1
128
- @items += 1
129
-
130
- return Sample.new(value,rank,delta, successor)
142
+ return Sample.new(value, rank, delta, successor)
131
143
  end
132
144
 
133
145
  def invariant(rank, n)
@@ -147,7 +159,7 @@ module Quantile
147
159
  rank = 0.0
148
160
  current = @head
149
161
 
150
- while !(current.nil? || current.successor.nil?)
162
+ while current && current.successor
151
163
  if current.rank + current.successor.rank + current.successor.delta <= invariant(rank, @observations)
152
164
  removed = current.successor
153
165
 
@@ -162,23 +174,4 @@ module Quantile
162
174
  end
163
175
  end
164
176
  end
165
-
166
- private
167
-
168
- BUFFER_SIZE = 512
169
-
170
- class Sample
171
- attr_accessor :value
172
- attr_accessor :rank
173
- attr_accessor :delta
174
- attr_accessor :successor
175
-
176
- def initialize(value, rank, delta, successor)
177
- @value = value
178
- @rank = rank
179
- @delta = delta
180
- @successor = successor
181
- end
182
- end
183
177
  end
184
-
@@ -18,6 +18,8 @@ module Quantile
18
18
  # @note {Quantile} is concurrency-safe.
19
19
  #
20
20
  class Quantile
21
+ include Comparable
22
+
21
23
  attr_reader :quantile
22
24
  attr_reader :inaccuracy
23
25
 
@@ -49,6 +51,16 @@ module Quantile
49
51
 
50
52
  return @coefficient_ii * rank
51
53
  end
54
+
55
+ #
56
+ # Compare the given other quantile.
57
+ #
58
+ # @return [Fixnum] -1, 0, +1 or nil depending on whether the other quantile
59
+ # is less than, equal to, or greater than self. This is the basis for the
60
+ # tests in Comparable.
61
+ #
62
+ def <=>(other)
63
+ self.quantile <=> other.quantile && self.inaccuracy <=> other.inaccuracy
64
+ end
52
65
  end
53
66
  end
54
-
@@ -12,5 +12,5 @@
12
12
  # limitations under the License.
13
13
 
14
14
  module Quantile
15
- VERSION = '0.0.2'
15
+ VERSION = '0.1.0'
16
16
  end
metadata CHANGED
@@ -1,8 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: quantile
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
5
- prerelease:
4
+ version: 0.1.0
6
5
  platform: ruby
7
6
  authors:
8
7
  - Matt T. Proud
@@ -27,27 +26,25 @@ files:
27
26
  homepage: http://github.com/matttproud/ruby_quantile_estimation
28
27
  licenses:
29
28
  - Apache 2.0
29
+ metadata: {}
30
30
  post_install_message:
31
31
  rdoc_options: []
32
32
  require_paths:
33
33
  - lib
34
34
  required_ruby_version: !ruby/object:Gem::Requirement
35
- none: false
36
35
  requirements:
37
36
  - - ! '>='
38
37
  - !ruby/object:Gem::Version
39
38
  version: '0'
40
39
  required_rubygems_version: !ruby/object:Gem::Requirement
41
- none: false
42
40
  requirements:
43
41
  - - ! '>='
44
42
  - !ruby/object:Gem::Version
45
43
  version: '0'
46
44
  requirements: []
47
45
  rubyforge_project:
48
- rubygems_version: 1.8.24
46
+ rubygems_version: 2.1.2
49
47
  signing_key:
50
- specification_version: 3
48
+ specification_version: 4
51
49
  summary: Streaming Quantile Estimation
52
50
  test_files: []
53
- has_rdoc: