hyperloglog 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,48 @@
1
+ require 'lib/hyperloglog'
2
+
3
+ def load_data(name)
4
+ File.readlines(File.join(File.dirname(__FILE__), 'data', "#{name}.txt"))
5
+ end
6
+
7
+ describe 'A HyperBuilder' do
8
+ before(:each) do
9
+ @builder = HyperBuilder.new(11)
10
+ 0.upto(9) {|index| @builder.offer(index.to_s)}
11
+ end
12
+
13
+ it 'should put to_s' do
14
+ @builder.to_s.should_not == nil
15
+ end
16
+
17
+ it 'should serialize' do
18
+ @builder.serialize.should_not == nil
19
+ end
20
+
21
+ it 'should generate an estimator' do
22
+ @builder.estimator.should_not == nil
23
+ end
24
+
25
+ it 'should create the proper estimator' do
26
+ @builder.to_s.should == @builder.estimator.to_s
27
+ end
28
+ end
29
+
30
+ describe 'A HyperEstimator' do
31
+ it 'should generate good estimates' do
32
+ items = load_data('small_integers')
33
+ total_items = items.uniq.length
34
+
35
+ 4.upto(20) do |m|
36
+ se = total_items * (1.04 / Math.sqrt(2**m))
37
+
38
+ builder = HyperBuilder.new(m)
39
+ items.each{|item| builder.offer(item.to_s)}
40
+
41
+ estimate = HyperEstimator.estimate(builder.estimator)
42
+
43
+ # puts "For m = #{m} we should have #{estimate} in [#{total_items - (3 * se)}, #{total_items + (3 * se)}]"
44
+ estimate.should be >= total_items - (3 * se)
45
+ estimate.should be <= total_items + (3 * se)
46
+ end
47
+ end
48
+ end
data/spec/spec.opts ADDED
@@ -0,0 +1,2 @@
1
+ --colour
2
+ --format progress
metadata ADDED
@@ -0,0 +1,92 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: hyperloglog
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - Josh Ferguson
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2012-04-24 00:00:00 Z
19
+ dependencies: []
20
+
21
+ description: An efficient implementation of the HyperLogLog cardinality estimator
22
+ email: josh@besquared.net
23
+ executables: []
24
+
25
+ extensions:
26
+ - ext/extconf.rb
27
+ extra_rdoc_files:
28
+ - CHANGELOG
29
+ - README.md
30
+ - ext/boolarray.h
31
+ - ext/ewah.h
32
+ - ext/extconf.rb
33
+ - ext/hyperloglog.cpp
34
+ - ext/murmur3.h
35
+ files:
36
+ - CHANGELOG
37
+ - Manifest
38
+ - README.md
39
+ - Rakefile
40
+ - ext/boolarray.h
41
+ - ext/ewah.h
42
+ - ext/extconf.rb
43
+ - ext/hyperloglog.cpp
44
+ - ext/murmur3.h
45
+ - spec/data/integers.txt
46
+ - spec/data/small_integers.txt
47
+ - spec/data/small_integers2.txt
48
+ - spec/hyperloglog_spec.rb
49
+ - spec/spec.opts
50
+ - hyperloglog.gemspec
51
+ homepage: http://www.github.com/besquared/hyperloglog/
52
+ licenses: []
53
+
54
+ post_install_message:
55
+ rdoc_options:
56
+ - --line-numbers
57
+ - --inline-source
58
+ - --title
59
+ - Hyperloglog
60
+ - --main
61
+ - README.md
62
+ require_paths:
63
+ - lib
64
+ - ext
65
+ required_ruby_version: !ruby/object:Gem::Requirement
66
+ none: false
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ hash: 3
71
+ segments:
72
+ - 0
73
+ version: "0"
74
+ required_rubygems_version: !ruby/object:Gem::Requirement
75
+ none: false
76
+ requirements:
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ hash: 11
80
+ segments:
81
+ - 1
82
+ - 2
83
+ version: "1.2"
84
+ requirements: []
85
+
86
+ rubyforge_project: hyperloglog
87
+ rubygems_version: 1.8.15
88
+ signing_key:
89
+ specification_version: 3
90
+ summary: An efficient implementation of the HyperLogLog cardinality estimator
91
+ test_files: []
92
+