hyperloglog 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,48 @@
1
+ require 'lib/hyperloglog'
2
+
3
+ def load_data(name)
4
+ File.readlines(File.join(File.dirname(__FILE__), 'data', "#{name}.txt"))
5
+ end
6
+
7
+ describe 'A HyperBuilder' do
8
+ before(:each) do
9
+ @builder = HyperBuilder.new(11)
10
+ 0.upto(9) {|index| @builder.offer(index.to_s)}
11
+ end
12
+
13
+ it 'should put to_s' do
14
+ @builder.to_s.should_not == nil
15
+ end
16
+
17
+ it 'should serialize' do
18
+ @builder.serialize.should_not == nil
19
+ end
20
+
21
+ it 'should generate an estimator' do
22
+ @builder.estimator.should_not == nil
23
+ end
24
+
25
+ it 'should create the proper estimator' do
26
+ @builder.to_s.should == @builder.estimator.to_s
27
+ end
28
+ end
29
+
30
+ describe 'A HyperEstimator' do
31
+ it 'should generate good estimates' do
32
+ items = load_data('small_integers')
33
+ total_items = items.uniq.length
34
+
35
+ 4.upto(20) do |m|
36
+ se = total_items * (1.04 / Math.sqrt(2**m))
37
+
38
+ builder = HyperBuilder.new(m)
39
+ items.each{|item| builder.offer(item.to_s)}
40
+
41
+ estimate = HyperEstimator.estimate(builder.estimator)
42
+
43
+ # puts "For m = #{m} we should have #{estimate} in [#{total_items - (3 * se)}, #{total_items + (3 * se)}]"
44
+ estimate.should be >= total_items - (3 * se)
45
+ estimate.should be <= total_items + (3 * se)
46
+ end
47
+ end
48
+ end
data/spec/spec.opts ADDED
@@ -0,0 +1,2 @@
1
+ --colour
2
+ --format progress
metadata ADDED
@@ -0,0 +1,92 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: hyperloglog
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - Josh Ferguson
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2012-04-24 00:00:00 Z
19
+ dependencies: []
20
+
21
+ description: An efficient implementation of the HyperLogLog cardinality estimator
22
+ email: josh@besquared.net
23
+ executables: []
24
+
25
+ extensions:
26
+ - ext/extconf.rb
27
+ extra_rdoc_files:
28
+ - CHANGELOG
29
+ - README.md
30
+ - ext/boolarray.h
31
+ - ext/ewah.h
32
+ - ext/extconf.rb
33
+ - ext/hyperloglog.cpp
34
+ - ext/murmur3.h
35
+ files:
36
+ - CHANGELOG
37
+ - Manifest
38
+ - README.md
39
+ - Rakefile
40
+ - ext/boolarray.h
41
+ - ext/ewah.h
42
+ - ext/extconf.rb
43
+ - ext/hyperloglog.cpp
44
+ - ext/murmur3.h
45
+ - spec/data/integers.txt
46
+ - spec/data/small_integers.txt
47
+ - spec/data/small_integers2.txt
48
+ - spec/hyperloglog_spec.rb
49
+ - spec/spec.opts
50
+ - hyperloglog.gemspec
51
+ homepage: http://www.github.com/besquared/hyperloglog/
52
+ licenses: []
53
+
54
+ post_install_message:
55
+ rdoc_options:
56
+ - --line-numbers
57
+ - --inline-source
58
+ - --title
59
+ - Hyperloglog
60
+ - --main
61
+ - README.md
62
+ require_paths:
63
+ - lib
64
+ - ext
65
+ required_ruby_version: !ruby/object:Gem::Requirement
66
+ none: false
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ hash: 3
71
+ segments:
72
+ - 0
73
+ version: "0"
74
+ required_rubygems_version: !ruby/object:Gem::Requirement
75
+ none: false
76
+ requirements:
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ hash: 11
80
+ segments:
81
+ - 1
82
+ - 2
83
+ version: "1.2"
84
+ requirements: []
85
+
86
+ rubyforge_project: hyperloglog
87
+ rubygems_version: 1.8.15
88
+ signing_key:
89
+ specification_version: 3
90
+ summary: An efficient implementation of the HyperLogLog cardinality estimator
91
+ test_files: []
92
+