quantile_estimator 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +7 -0
  2. data/.ruby-gemset +1 -0
  3. data/.ruby-version +1 -0
  4. data/Gemfile +4 -0
  5. data/Gemfile.lock +17 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +85 -0
  8. data/Rakefile +9 -0
  9. data/benchmark.rb +21 -0
  10. data/doc/Cursor.html +422 -0
  11. data/doc/Estimator.html +779 -0
  12. data/doc/Invariant.html +115 -0
  13. data/doc/Invariant/Biased.html +268 -0
  14. data/doc/Invariant/Invariant.html +193 -0
  15. data/doc/Invariant/SingleTarget.html +278 -0
  16. data/doc/Invariant/Targeted.html +278 -0
  17. data/doc/Item.html +620 -0
  18. data/doc/Quantile.html +270 -0
  19. data/doc/QuantileEstimator.html +117 -0
  20. data/doc/_index.html +211 -0
  21. data/doc/class_list.html +54 -0
  22. data/doc/compression.png +0 -0
  23. data/doc/css/common.css +1 -0
  24. data/doc/css/full_list.css +57 -0
  25. data/doc/css/style.css +338 -0
  26. data/doc/file.README.html +186 -0
  27. data/doc/file_list.html +56 -0
  28. data/doc/frames.html +26 -0
  29. data/doc/index.html +186 -0
  30. data/doc/js/app.js +219 -0
  31. data/doc/js/full_list.js +178 -0
  32. data/doc/js/jquery.js +4 -0
  33. data/doc/method_list.html +221 -0
  34. data/doc/time.png +0 -0
  35. data/doc/top-level-namespace.html +114 -0
  36. data/lib/estimator.rb +120 -0
  37. data/lib/quantile_estimator/cursor.rb +24 -0
  38. data/lib/quantile_estimator/invariant.rb +47 -0
  39. data/lib/quantile_estimator/item.rb +21 -0
  40. data/lib/quantile_estimator/quantile.rb +3 -0
  41. data/lib/quantile_estimator/test.rb +37 -0
  42. data/lib/quantile_estimator/version.rb +3 -0
  43. data/pkg/quantile_estimator-0.0.1.gem +0 -0
  44. data/quantile_estimator.gemspec +29 -0
  45. data/test/test_quantile_estimator.rb +85 -0
  46. metadata +120 -0
data/doc/time.png ADDED
Binary file
@@ -0,0 +1,114 @@
1
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
2
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
3
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4
+ <head>
5
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
6
+ <title>
7
+ Top Level Namespace
8
+
9
+ &mdash; Documentation by YARD 0.8.7.3
10
+
11
+ </title>
12
+
13
+ <link rel="stylesheet" href="css/style.css" type="text/css" charset="utf-8" />
14
+
15
+ <link rel="stylesheet" href="css/common.css" type="text/css" charset="utf-8" />
16
+
17
+ <script type="text/javascript" charset="utf-8">
18
+ hasFrames = window.top.frames.main ? true : false;
19
+ relpath = '';
20
+ framesUrl = "frames.html#!" + escape(window.location.href);
21
+ </script>
22
+
23
+
24
+ <script type="text/javascript" charset="utf-8" src="js/jquery.js"></script>
25
+
26
+ <script type="text/javascript" charset="utf-8" src="js/app.js"></script>
27
+
28
+
29
+ </head>
30
+ <body>
31
+ <div id="header">
32
+ <div id="menu">
33
+
34
+ <a href="_index.html">Index</a> &raquo;
35
+
36
+
37
+ <span class="title">Top Level Namespace</span>
38
+
39
+
40
+ <div class="noframes"><span class="title">(</span><a href="." target="_top">no frames</a><span class="title">)</span></div>
41
+ </div>
42
+
43
+ <div id="search">
44
+
45
+ <a class="full_list_link" id="class_list_link"
46
+ href="class_list.html">
47
+ Class List
48
+ </a>
49
+
50
+ <a class="full_list_link" id="method_list_link"
51
+ href="method_list.html">
52
+ Method List
53
+ </a>
54
+
55
+ <a class="full_list_link" id="file_list_link"
56
+ href="file_list.html">
57
+ File List
58
+ </a>
59
+
60
+ </div>
61
+ <div class="clear"></div>
62
+ </div>
63
+
64
+ <iframe id="search_frame"></iframe>
65
+
66
+ <div id="content"><h1>Top Level Namespace
67
+
68
+
69
+
70
+ </h1>
71
+
72
+ <dl class="box">
73
+
74
+
75
+
76
+
77
+
78
+
79
+
80
+
81
+ </dl>
82
+ <div class="clear"></div>
83
+
84
+ <h2>Defined Under Namespace</h2>
85
+ <p class="children">
86
+
87
+
88
+ <strong class="modules">Modules:</strong> <span class='object_link'><a href="Invariant.html" title="Invariant (module)">Invariant</a></span>, <span class='object_link'><a href="QuantileEstimator.html" title="QuantileEstimator (module)">QuantileEstimator</a></span>
89
+
90
+
91
+
92
+ <strong class="classes">Classes:</strong> <span class='object_link'><a href="Cursor.html" title="Cursor (class)">Cursor</a></span>, <span class='object_link'><a href="Estimator.html" title="Estimator (class)">Estimator</a></span>, <span class='object_link'><a href="Item.html" title="Item (class)">Item</a></span>, <span class='object_link'><a href="Quantile.html" title="Quantile (class)">Quantile</a></span>
93
+
94
+
95
+ </p>
96
+
97
+
98
+
99
+
100
+
101
+
102
+
103
+
104
+
105
+ </div>
106
+
107
+ <div id="footer">
108
+ Generated on Fri Nov 15 15:39:43 2013 by
109
+ <a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
110
+ 0.8.7.3 (ruby-2.0.0).
111
+ </div>
112
+
113
+ </body>
114
+ </html>
data/lib/estimator.rb ADDED
@@ -0,0 +1,120 @@
1
+ require 'quantile_estimator/invariant'
2
+ require 'quantile_estimator/item'
3
+ require 'quantile_estimator/cursor'
4
+
5
+ class Estimator
6
+ attr_accessor :samples
7
+ attr_accessor :n
8
+ attr_reader :invariant
9
+
10
+ # Creates a new quantile Estimator object using the provided invariant.
11
+ #
12
+ # == Parameters:
13
+ # value::
14
+ # An Invariant object
15
+ #
16
+ def initialize(invariant)
17
+ @invariant = invariant
18
+ self.samples = []
19
+ self.n = 0
20
+ end
21
+
22
+ # Inserts a new element into the quantile estimator.
23
+ # O(n), where n is the number of elements of the internal data structure
24
+ #
25
+ # == Parameters:
26
+ # value::
27
+ # A Fixnum to observe
28
+ #
29
+ # == Returns:
30
+ # The number of observations after the insertion
31
+ #
32
+ def insert(value)
33
+ i = 0
34
+ r_i = 0
35
+
36
+ while(i < samples.length)
37
+ item = samples[i]
38
+ break if item.value > value # determines the order
39
+ r_i = r_i + item.g
40
+ i += 1
41
+ end
42
+
43
+ delta = if (i-1 < 0) || (i == samples.length)
44
+ 0
45
+ else
46
+ # r_i
47
+ [0, invariant.upper_bound(r_i, n).floor - 1].max
48
+ end
49
+
50
+ samples.insert(i, Item.new(value, 1, delta, r_i))
51
+
52
+ while(i < samples.length)
53
+ item = samples[i]
54
+ r_i = r_i + item.g
55
+ item.rank = r_i
56
+ i += 1
57
+ end
58
+
59
+ self.n += 1
60
+ end
61
+
62
+ # Compresses the internal data-structure.
63
+ # O(n), where n is the number of elements of the internal data structure
64
+ #
65
+ # == Parameters:
66
+ #
67
+ # == Returns:
68
+ # The new size of the data-structure
69
+ def compress!
70
+ c = Cursor.new(self.samples, self.samples.length - 1)
71
+ while (~c != nil) && (~c.previous != nil)
72
+ if ((~c.previous).g + (~c).g + (~c).delta).to_f <=
73
+ invariant.upper_bound((~c.previous).rank, n)
74
+ removed = ~c.previous
75
+ (~c).rank = removed.rank
76
+ (~c).g += removed.g
77
+ c.previous.remove!
78
+ c = c.previous
79
+ end
80
+ c = c.previous
81
+ end
82
+ self.samples.length
83
+ end
84
+
85
+ # Queries de estimator for the given rank.
86
+ # O(n), where n is the number of elements of the internal data structure
87
+ #
88
+ # == Parameters:
89
+ # phi::
90
+ #
91
+ # A Fixnum between (0, 1) representing the rank to be queried (i.e, 0.5 represents
92
+ # the 50% quantile)
93
+ #
94
+ # == Returns:
95
+ # The approximate value for the quantile you are checking
96
+ #
97
+ def query(phi)
98
+ if n == 0
99
+ nil
100
+ else
101
+ rank = 0
102
+ c = Cursor.new(samples)
103
+ phi_n = phi * n
104
+ last = (~c).value
105
+ while ~c != nil
106
+ last = (~c).value
107
+ break if ~c.next == nil
108
+ c = c.next
109
+
110
+ if (rank + (~c).g + (~c).delta) > (phi_n + (invariant.upper_bound(phi_n, n) / 2))
111
+ return last
112
+ end
113
+
114
+ rank += (~c).g
115
+ end
116
+
117
+ return (~c).value
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,24 @@
1
+ class Cursor
2
+ def initialize(array, start=0)
3
+ @array = array
4
+ @start = start
5
+ end
6
+
7
+ def ~
8
+ if @start >= 0
9
+ @array[@start]
10
+ end
11
+ end
12
+
13
+ def remove!
14
+ @array.delete_at(@start)
15
+ end
16
+
17
+ def next
18
+ Cursor.new(@array, @start + 1)
19
+ end
20
+
21
+ def previous
22
+ Cursor.new(@array, @start - 1)
23
+ end
24
+ end
@@ -0,0 +1,47 @@
1
+ module Invariant
2
+ class Invariant
3
+ def upper_bound(rank, n)
4
+ raise "Implement me"
5
+ end
6
+ end
7
+
8
+ class Biased < Invariant
9
+ def initialize(epsilon)
10
+ @epsilon = epsilon
11
+ end
12
+
13
+ def upper_bound(rank, n)
14
+ 2 * @epsilon * rank
15
+ end
16
+ end
17
+
18
+ class SingleTarget < Invariant
19
+ def initialize(phi, epsilon)
20
+ @phi = phi
21
+ @epsilon = epsilon
22
+ end
23
+
24
+ def upper_bound(rank, n)
25
+ if @phi * n <= rank
26
+ (2 * @epsilon * rank) / @phi
27
+ else
28
+ (2 * @epsilon * (n - rank)) / (1 - @phi)
29
+ end
30
+ end
31
+ end
32
+
33
+ class Targeted < Invariant
34
+ def initialize(target_values)
35
+ @targets = target_values.map { |target_value|
36
+ phi, epsilon = target_value
37
+ SingleTarget.new(phi, epsilon)
38
+ }
39
+ end
40
+
41
+ def upper_bound(rank, n)
42
+ @targets.map { |target|
43
+ target.upper_bound(rank, n)
44
+ }.min
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,21 @@
1
+ # n = items
2
+ # k = asymptote
3
+ # S = data structure containing multiple tuples
4
+ # s = samples of items from the data stream
5
+ class Item
6
+ attr_accessor :value, :g, :delta, :rank
7
+ def initialize(value, g, delta, rank=nil)
8
+ self.value = value
9
+ self.g = g
10
+ self.delta = delta
11
+ self.rank = rank
12
+ end
13
+
14
+ def merge(item)
15
+ Item.new(item.value, self.g + item.g, item.delta, item.rank)
16
+ end
17
+
18
+ def to_s
19
+ [value, g, delta, rank].inspect
20
+ end
21
+ end
@@ -0,0 +1,3 @@
1
+ class Quantile
2
+ attr_accessor :quantile, :error
3
+ end
@@ -0,0 +1,37 @@
1
+ require "rubygems"
2
+ require "quantile_estimator"
3
+ require "item"
4
+ require "invariant"
5
+
6
+ # quantile = Quantile.new
7
+ # quantile.quantile, quantile.error = 0.90, 0.001
8
+
9
+ invariant = BiasedInvariant.new(0.00001)
10
+ quantile_estimator = QuantileEstimator.new(invariant)
11
+
12
+ # (1..1000).each do |x|
13
+ # # quantile_estimator.insert((1..1000).to_a[(rand * 100).floor])
14
+ # quantile_estimator.insert(rand)
15
+ # if x % 50 == 1
16
+ # p "size #{quantile_estimator.samples.size}"
17
+ # p "query(0.495) #{quantile_estimator.query(0.495)}"
18
+ # quantile_estimator.compress!
19
+ # end
20
+ # end
21
+
22
+ # p (1..10).to_a.shuffle
23
+ test_values = [1, 4, 6, 5, 3, 10, 8, 9, 2, 7]
24
+ test_values.each do |x|
25
+ quantile_estimator.insert(x)
26
+ end
27
+
28
+ quantile_estimator.samples.each {|x| p x}
29
+
30
+ # p "size #{quantile_estimator.samples.size}"
31
+ p "query(0.495) #{quantile_estimator.query(0.495)}"
32
+ # p quantile_estimator.samples
33
+
34
+ # quantile_estimator.compress!
35
+
36
+ # p "size #{quantile_estimator.samples.size}"
37
+ # p "query(0.495) #{quantile_estimator.query(0.495)}"
@@ -0,0 +1,3 @@
1
+ module QuantileEstimator
2
+ VERSION = "0.0.2"
3
+ end
Binary file
@@ -0,0 +1,29 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'quantile_estimator/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "quantile_estimator"
8
+ spec.version = QuantileEstimator::VERSION
9
+ spec.authors = ["Diego Echeverri"]
10
+ spec.email = ["diegoeche@gmail.com"]
11
+ spec.description = %q{
12
+ Implementation of quantile estimators based on.
13
+
14
+ Cormode et. al.: "Effective Computation of Biased Quantiles over Data Streams"
15
+ }
16
+ spec.summary = %q{
17
+ This gem implements a simple quantile estimator using Ruby Arrays.
18
+ }
19
+ spec.homepage = "https://github.com/diegoeche/quantile_estimator.rb"
20
+ spec.license = "MIT"
21
+
22
+ spec.files = `git ls-files`.split($/)
23
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
24
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
25
+ spec.require_paths = ["lib", "lib/quantile_estimator"]
26
+
27
+ spec.add_development_dependency "bundler", "~> 1.3"
28
+ spec.add_development_dependency "rake"
29
+ end
@@ -0,0 +1,85 @@
1
+ require 'test/unit'
2
+ require 'estimator'
3
+
4
+ # The current tests come from the implementation at
5
+ # https://github.com/odo/quantile_estimator
6
+
7
+ class QuantileEstimatorTest < Test::Unit::TestCase
8
+
9
+ def test_biased_with_uniform_distribution
10
+ invariant = Invariant::Biased.new(0.00001)
11
+ estimator = Estimator.new(invariant)
12
+
13
+ # Equivalent to: (1..10).to_a.shuffle
14
+ test_values = [1, 4, 6, 5, 3, 10, 8, 9, 2, 7]
15
+ test_values.each do |x|
16
+ estimator.insert(x)
17
+ end
18
+
19
+ assert_equal 10, estimator.n
20
+ assert_equal 5, estimator.query(0.45)
21
+
22
+ # In this case it should not compress
23
+ estimator.compress!
24
+
25
+ assert_equal 10, estimator.n
26
+ assert_equal 5, estimator.query(0.45)
27
+ assert_equal 3, estimator.query(0.21)
28
+ end
29
+
30
+ def test_compression
31
+ invariant = Invariant::Biased.new(0.2)
32
+ estimator = Estimator.new(invariant)
33
+
34
+ # Equivalent to: (1..10).to_a.shuffle
35
+ test_values = [1, 4, 6, 5, 3, 10, 8, 9, 2, 7]
36
+ test_values.each do |x|
37
+ estimator.insert(x)
38
+ end
39
+
40
+ assert_equal 10, estimator.n
41
+ assert_equal 6, estimator.query(0.45)
42
+
43
+ estimator.compress!
44
+
45
+ assert_equal 8, estimator.samples.length
46
+ assert_equal 6, estimator.query(0.45)
47
+
48
+ assert_equal 10, estimator.query(0.8)
49
+ end
50
+
51
+ def test_targeted_invariant
52
+ invariant = Invariant::Targeted.new([
53
+ [0.05, 0.02],
54
+ [0.5, 0.02],
55
+ [0.95, 0.02]
56
+ ])
57
+
58
+ estimator = Estimator.new(invariant)
59
+
60
+ # Equivalent to: (1..10).to_a.shuffle
61
+ test_values = [10, 29, 27, 17, 20,
62
+ 6, 21, 13, 14, 2,
63
+ 16, 8, 3, 9, 5,
64
+ 7, 22, 12, 4, 11,
65
+ 26, 18, 25, 28, 19,
66
+ 30, 1, 23, 15, 24]
67
+
68
+ test_values.each do |x|
69
+ estimator.insert(x)
70
+ end
71
+
72
+ assert_equal 30, estimator.n
73
+ assert_equal 30, estimator.samples.size
74
+ assert_equal 15, estimator.query(0.45)
75
+
76
+ estimator.compress!
77
+
78
+ assert_equal 30, estimator.n
79
+ assert_equal 26, estimator.samples.length
80
+
81
+ assert_equal 16, estimator.query(0.5)
82
+ assert_equal 30, estimator.query(0.95)
83
+ assert_equal 2, estimator.query(0.05)
84
+ end
85
+ end