hyperloglog-redis 1.0.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +1 -0
- data/Gemfile.lock +2 -0
- data/HISTORY.md +25 -6
- data/README.md +124 -25
- data/VERSION +1 -1
- data/hyperloglog-redis.gemspec +10 -4
- data/lib/algorithm.rb +69 -0
- data/lib/counter.rb +40 -0
- data/lib/hyperloglog-redis.rb +3 -1
- data/lib/time_series_counter.rb +80 -0
- data/spec/hyper_log_log_spec.rb +199 -197
- data/spec/time_series_counter_spec.rb +216 -0
- metadata +28 -14
- data/lib/hyper_log_log.rb +0 -96
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hyperloglog-redis
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-11-30 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: murmurhash3
|
16
|
-
requirement: &
|
16
|
+
requirement: &2173180560 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 0.1.3
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *2173180560
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: redis
|
27
|
-
requirement: &
|
27
|
+
requirement: &2173180080 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 3.0.1
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *2173180080
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: jeweler
|
38
|
-
requirement: &
|
38
|
+
requirement: &2173179600 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: 1.8.4
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *2173179600
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: rake
|
49
|
-
requirement: &
|
49
|
+
requirement: &2173179120 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ~>
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: 0.9.2.2
|
55
55
|
type: :development
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *2173179120
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: rspec
|
60
|
-
requirement: &
|
60
|
+
requirement: &2173178640 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ~>
|
@@ -65,7 +65,18 @@ dependencies:
|
|
65
65
|
version: 2.11.0
|
66
66
|
type: :development
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *2173178640
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: timecop
|
71
|
+
requirement: &2173178160 !ruby/object:Gem::Requirement
|
72
|
+
none: false
|
73
|
+
requirements:
|
74
|
+
- - ~>
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: 0.5.3
|
77
|
+
type: :development
|
78
|
+
prerelease: false
|
79
|
+
version_requirements: *2173178160
|
69
80
|
description: An implementation of the HyperLogLog set cardinality estimation algorithm
|
70
81
|
in Ruby using Redis as a back-end
|
71
82
|
email: aaron.windsor@gmail.com
|
@@ -85,10 +96,13 @@ files:
|
|
85
96
|
- Rakefile
|
86
97
|
- VERSION
|
87
98
|
- hyperloglog-redis.gemspec
|
88
|
-
- lib/
|
99
|
+
- lib/algorithm.rb
|
100
|
+
- lib/counter.rb
|
89
101
|
- lib/hyperloglog-redis.rb
|
102
|
+
- lib/time_series_counter.rb
|
90
103
|
- spec/hyper_log_log_spec.rb
|
91
104
|
- spec/spec_helper.rb
|
105
|
+
- spec/time_series_counter_spec.rb
|
92
106
|
homepage: http://github.com/aaw/hyperloglog-redis
|
93
107
|
licenses:
|
94
108
|
- MIT
|
@@ -104,7 +118,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
104
118
|
version: '0'
|
105
119
|
segments:
|
106
120
|
- 0
|
107
|
-
hash:
|
121
|
+
hash: -3898531581503867473
|
108
122
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
109
123
|
none: false
|
110
124
|
requirements:
|
data/lib/hyper_log_log.rb
DELETED
@@ -1,96 +0,0 @@
|
|
1
|
-
require 'redis'
|
2
|
-
require 'murmurhash3'
|
3
|
-
|
4
|
-
class HyperLogLog
|
5
|
-
def initialize(redis, b=10)
|
6
|
-
raise "Accuracy not supported. Please choose a value of b between 4 and 16" if b < 4 || b > 16
|
7
|
-
@redis = redis
|
8
|
-
@bits_in_hash = 32 - b
|
9
|
-
@m = (2 ** b).to_i
|
10
|
-
if @m == 16
|
11
|
-
@alpha = 0.673
|
12
|
-
elsif @m == 32
|
13
|
-
@alpha = 0.697
|
14
|
-
elsif @m == 64
|
15
|
-
@alpha = 0.709
|
16
|
-
else
|
17
|
-
@alpha = 0.7213/(1 + 1.079/@m)
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
def add(counter_name, value)
|
22
|
-
hash = MurmurHash3::V32.murmur3_32_str_hash(value)
|
23
|
-
function_name = hash % @m
|
24
|
-
w = hash / @m
|
25
|
-
existing_value = (@redis.hget(counter_name, function_name) || 0).to_i
|
26
|
-
new_value = [existing_value, rho(w)].max
|
27
|
-
@redis.hset(counter_name, function_name, new_value) if new_value > existing_value
|
28
|
-
end
|
29
|
-
|
30
|
-
# Estimate the cardinality of a single set
|
31
|
-
def count(counter_name)
|
32
|
-
union_helper([counter_name])
|
33
|
-
end
|
34
|
-
|
35
|
-
# Estimate the cardinality of the union of several sets
|
36
|
-
def union(*counter_names)
|
37
|
-
union_helper(counter_names)
|
38
|
-
end
|
39
|
-
|
40
|
-
# Store the union of several sets in *destination* so that it can be used as
|
41
|
-
# a HyperLogLog counter later.
|
42
|
-
def union_store(destination, *counter_names)
|
43
|
-
raw_union(counter_names).each do |key, count|
|
44
|
-
@redis.hset(destination, key, count)
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
# Estimate the cardinality of the intersection of several sets. We do this by
|
49
|
-
# using the principle of inclusion and exclusion to represent the size of the
|
50
|
-
# intersection as the alternating sum of an exponential number of
|
51
|
-
# cardinalities of unions of smaller sets.
|
52
|
-
def intersection(*counter_names)
|
53
|
-
icount = (1..counter_names.length).map do |k|
|
54
|
-
counter_names.combination(k).map do |group|
|
55
|
-
((k % 2 == 0) ? -1 : 1) * union_helper(group)
|
56
|
-
end.inject(0, :+)
|
57
|
-
end.inject(0, :+)
|
58
|
-
[icount, 0].max
|
59
|
-
end
|
60
|
-
|
61
|
-
def union_helper(counter_names)
|
62
|
-
all_estimates = raw_union(counter_names).map{ |value, score| 2 ** -score }
|
63
|
-
estimate_sum = all_estimates.reduce(:+) || 0
|
64
|
-
estimate = @alpha * @m * @m * ((estimate_sum + @m - all_estimates.length) ** -1)
|
65
|
-
if estimate <= 2.5 * @m
|
66
|
-
if all_estimates.length == @m
|
67
|
-
estimate.round
|
68
|
-
else # Correction for small sets
|
69
|
-
(@m * Math.log(Float(@m)/(@m - all_estimates.length))).round
|
70
|
-
end
|
71
|
-
elsif estimate <= 2 ** 32 / 30.0
|
72
|
-
estimate.round
|
73
|
-
else # Correction for large sets
|
74
|
-
(-2**32 * Math.log(1 - estimate/(2.0**32))).round
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
def raw_union(counter_names)
|
79
|
-
counter_names.map{ |counter_name| @redis.hgetall(counter_name).map{ |x,y| [x, y.to_i] } }
|
80
|
-
.reduce(:concat)
|
81
|
-
.group_by{ |key, count| key }
|
82
|
-
.map{ |key, counters| [key, counters.map{ |x| x.last }.max] }
|
83
|
-
end
|
84
|
-
|
85
|
-
# rho(i) is the position of the first 1 in the binary representation of i,
|
86
|
-
# reading from most significant to least significant bits. Some examples:
|
87
|
-
# rho(1...) = 1, rho(001...) = 3, rho(000...0) = @bits_in_hash + 1
|
88
|
-
def rho(i)
|
89
|
-
if i == 0
|
90
|
-
@bits_in_hash + 1
|
91
|
-
else
|
92
|
-
@bits_in_hash - Math.log(i, 2).floor
|
93
|
-
end
|
94
|
-
end
|
95
|
-
|
96
|
-
end
|