hyperloglog-redis 1.0.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +1 -0
- data/Gemfile.lock +2 -0
- data/HISTORY.md +25 -6
- data/README.md +124 -25
- data/VERSION +1 -1
- data/hyperloglog-redis.gemspec +10 -4
- data/lib/algorithm.rb +69 -0
- data/lib/counter.rb +40 -0
- data/lib/hyperloglog-redis.rb +3 -1
- data/lib/time_series_counter.rb +80 -0
- data/spec/hyper_log_log_spec.rb +199 -197
- data/spec/time_series_counter_spec.rb +216 -0
- metadata +28 -14
- data/lib/hyper_log_log.rb +0 -96
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hyperloglog-redis
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-11-30 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: murmurhash3
|
16
|
-
requirement: &
|
16
|
+
requirement: &2173180560 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 0.1.3
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *2173180560
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: redis
|
27
|
-
requirement: &
|
27
|
+
requirement: &2173180080 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 3.0.1
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *2173180080
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: jeweler
|
38
|
-
requirement: &
|
38
|
+
requirement: &2173179600 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: 1.8.4
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *2173179600
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: rake
|
49
|
-
requirement: &
|
49
|
+
requirement: &2173179120 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ~>
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: 0.9.2.2
|
55
55
|
type: :development
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *2173179120
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: rspec
|
60
|
-
requirement: &
|
60
|
+
requirement: &2173178640 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ~>
|
@@ -65,7 +65,18 @@ dependencies:
|
|
65
65
|
version: 2.11.0
|
66
66
|
type: :development
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *2173178640
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: timecop
|
71
|
+
requirement: &2173178160 !ruby/object:Gem::Requirement
|
72
|
+
none: false
|
73
|
+
requirements:
|
74
|
+
- - ~>
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: 0.5.3
|
77
|
+
type: :development
|
78
|
+
prerelease: false
|
79
|
+
version_requirements: *2173178160
|
69
80
|
description: An implementation of the HyperLogLog set cardinality estimation algorithm
|
70
81
|
in Ruby using Redis as a back-end
|
71
82
|
email: aaron.windsor@gmail.com
|
@@ -85,10 +96,13 @@ files:
|
|
85
96
|
- Rakefile
|
86
97
|
- VERSION
|
87
98
|
- hyperloglog-redis.gemspec
|
88
|
-
- lib/
|
99
|
+
- lib/algorithm.rb
|
100
|
+
- lib/counter.rb
|
89
101
|
- lib/hyperloglog-redis.rb
|
102
|
+
- lib/time_series_counter.rb
|
90
103
|
- spec/hyper_log_log_spec.rb
|
91
104
|
- spec/spec_helper.rb
|
105
|
+
- spec/time_series_counter_spec.rb
|
92
106
|
homepage: http://github.com/aaw/hyperloglog-redis
|
93
107
|
licenses:
|
94
108
|
- MIT
|
@@ -104,7 +118,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
104
118
|
version: '0'
|
105
119
|
segments:
|
106
120
|
- 0
|
107
|
-
hash:
|
121
|
+
hash: -3898531581503867473
|
108
122
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
109
123
|
none: false
|
110
124
|
requirements:
|
data/lib/hyper_log_log.rb
DELETED
@@ -1,96 +0,0 @@
|
|
1
|
-
require 'redis'
|
2
|
-
require 'murmurhash3'
|
3
|
-
|
4
|
-
class HyperLogLog
|
5
|
-
def initialize(redis, b=10)
|
6
|
-
raise "Accuracy not supported. Please choose a value of b between 4 and 16" if b < 4 || b > 16
|
7
|
-
@redis = redis
|
8
|
-
@bits_in_hash = 32 - b
|
9
|
-
@m = (2 ** b).to_i
|
10
|
-
if @m == 16
|
11
|
-
@alpha = 0.673
|
12
|
-
elsif @m == 32
|
13
|
-
@alpha = 0.697
|
14
|
-
elsif @m == 64
|
15
|
-
@alpha = 0.709
|
16
|
-
else
|
17
|
-
@alpha = 0.7213/(1 + 1.079/@m)
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
def add(counter_name, value)
|
22
|
-
hash = MurmurHash3::V32.murmur3_32_str_hash(value)
|
23
|
-
function_name = hash % @m
|
24
|
-
w = hash / @m
|
25
|
-
existing_value = (@redis.hget(counter_name, function_name) || 0).to_i
|
26
|
-
new_value = [existing_value, rho(w)].max
|
27
|
-
@redis.hset(counter_name, function_name, new_value) if new_value > existing_value
|
28
|
-
end
|
29
|
-
|
30
|
-
# Estimate the cardinality of a single set
|
31
|
-
def count(counter_name)
|
32
|
-
union_helper([counter_name])
|
33
|
-
end
|
34
|
-
|
35
|
-
# Estimate the cardinality of the union of several sets
|
36
|
-
def union(*counter_names)
|
37
|
-
union_helper(counter_names)
|
38
|
-
end
|
39
|
-
|
40
|
-
# Store the union of several sets in *destination* so that it can be used as
|
41
|
-
# a HyperLogLog counter later.
|
42
|
-
def union_store(destination, *counter_names)
|
43
|
-
raw_union(counter_names).each do |key, count|
|
44
|
-
@redis.hset(destination, key, count)
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
# Estimate the cardinality of the intersection of several sets. We do this by
|
49
|
-
# using the principle of inclusion and exclusion to represent the size of the
|
50
|
-
# intersection as the alternating sum of an exponential number of
|
51
|
-
# cardinalities of unions of smaller sets.
|
52
|
-
def intersection(*counter_names)
|
53
|
-
icount = (1..counter_names.length).map do |k|
|
54
|
-
counter_names.combination(k).map do |group|
|
55
|
-
((k % 2 == 0) ? -1 : 1) * union_helper(group)
|
56
|
-
end.inject(0, :+)
|
57
|
-
end.inject(0, :+)
|
58
|
-
[icount, 0].max
|
59
|
-
end
|
60
|
-
|
61
|
-
def union_helper(counter_names)
|
62
|
-
all_estimates = raw_union(counter_names).map{ |value, score| 2 ** -score }
|
63
|
-
estimate_sum = all_estimates.reduce(:+) || 0
|
64
|
-
estimate = @alpha * @m * @m * ((estimate_sum + @m - all_estimates.length) ** -1)
|
65
|
-
if estimate <= 2.5 * @m
|
66
|
-
if all_estimates.length == @m
|
67
|
-
estimate.round
|
68
|
-
else # Correction for small sets
|
69
|
-
(@m * Math.log(Float(@m)/(@m - all_estimates.length))).round
|
70
|
-
end
|
71
|
-
elsif estimate <= 2 ** 32 / 30.0
|
72
|
-
estimate.round
|
73
|
-
else # Correction for large sets
|
74
|
-
(-2**32 * Math.log(1 - estimate/(2.0**32))).round
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
def raw_union(counter_names)
|
79
|
-
counter_names.map{ |counter_name| @redis.hgetall(counter_name).map{ |x,y| [x, y.to_i] } }
|
80
|
-
.reduce(:concat)
|
81
|
-
.group_by{ |key, count| key }
|
82
|
-
.map{ |key, counters| [key, counters.map{ |x| x.last }.max] }
|
83
|
-
end
|
84
|
-
|
85
|
-
# rho(i) is the position of the first 1 in the binary representation of i,
|
86
|
-
# reading from most significant to least significant bits. Some examples:
|
87
|
-
# rho(1...) = 1, rho(001...) = 3, rho(000...0) = @bits_in_hash + 1
|
88
|
-
def rho(i)
|
89
|
-
if i == 0
|
90
|
-
@bits_in_hash + 1
|
91
|
-
else
|
92
|
-
@bits_in_hash - Math.log(i, 2).floor
|
93
|
-
end
|
94
|
-
end
|
95
|
-
|
96
|
-
end
|