hyperloglog-redis 1.0.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hyperloglog-redis
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 2.0.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-10-26 00:00:00.000000000Z
12
+ date: 2012-11-30 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: murmurhash3
16
- requirement: &2172774780 !ruby/object:Gem::Requirement
16
+ requirement: &2173180560 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: 0.1.3
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *2172774780
24
+ version_requirements: *2173180560
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: redis
27
- requirement: &2172774300 !ruby/object:Gem::Requirement
27
+ requirement: &2173180080 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 3.0.1
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *2172774300
35
+ version_requirements: *2173180080
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: jeweler
38
- requirement: &2172773820 !ruby/object:Gem::Requirement
38
+ requirement: &2173179600 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: 1.8.4
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *2172773820
46
+ version_requirements: *2173179600
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: rake
49
- requirement: &2172773340 !ruby/object:Gem::Requirement
49
+ requirement: &2173179120 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ~>
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: 0.9.2.2
55
55
  type: :development
56
56
  prerelease: false
57
- version_requirements: *2172773340
57
+ version_requirements: *2173179120
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: rspec
60
- requirement: &2172789220 !ruby/object:Gem::Requirement
60
+ requirement: &2173178640 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ~>
@@ -65,7 +65,18 @@ dependencies:
65
65
  version: 2.11.0
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *2172789220
68
+ version_requirements: *2173178640
69
+ - !ruby/object:Gem::Dependency
70
+ name: timecop
71
+ requirement: &2173178160 !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ~>
75
+ - !ruby/object:Gem::Version
76
+ version: 0.5.3
77
+ type: :development
78
+ prerelease: false
79
+ version_requirements: *2173178160
69
80
  description: An implementation of the HyperLogLog set cardinality estimation algorithm
70
81
  in Ruby using Redis as a back-end
71
82
  email: aaron.windsor@gmail.com
@@ -85,10 +96,13 @@ files:
85
96
  - Rakefile
86
97
  - VERSION
87
98
  - hyperloglog-redis.gemspec
88
- - lib/hyper_log_log.rb
99
+ - lib/algorithm.rb
100
+ - lib/counter.rb
89
101
  - lib/hyperloglog-redis.rb
102
+ - lib/time_series_counter.rb
90
103
  - spec/hyper_log_log_spec.rb
91
104
  - spec/spec_helper.rb
105
+ - spec/time_series_counter_spec.rb
92
106
  homepage: http://github.com/aaw/hyperloglog-redis
93
107
  licenses:
94
108
  - MIT
@@ -104,7 +118,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
104
118
  version: '0'
105
119
  segments:
106
120
  - 0
107
- hash: 2426569210961737114
121
+ hash: -3898531581503867473
108
122
  required_rubygems_version: !ruby/object:Gem::Requirement
109
123
  none: false
110
124
  requirements:
@@ -1,96 +0,0 @@
1
- require 'redis'
2
- require 'murmurhash3'
3
-
4
- class HyperLogLog
5
- def initialize(redis, b=10)
6
- raise "Accuracy not supported. Please choose a value of b between 4 and 16" if b < 4 || b > 16
7
- @redis = redis
8
- @bits_in_hash = 32 - b
9
- @m = (2 ** b).to_i
10
- if @m == 16
11
- @alpha = 0.673
12
- elsif @m == 32
13
- @alpha = 0.697
14
- elsif @m == 64
15
- @alpha = 0.709
16
- else
17
- @alpha = 0.7213/(1 + 1.079/@m)
18
- end
19
- end
20
-
21
- def add(counter_name, value)
22
- hash = MurmurHash3::V32.murmur3_32_str_hash(value)
23
- function_name = hash % @m
24
- w = hash / @m
25
- existing_value = (@redis.hget(counter_name, function_name) || 0).to_i
26
- new_value = [existing_value, rho(w)].max
27
- @redis.hset(counter_name, function_name, new_value) if new_value > existing_value
28
- end
29
-
30
- # Estimate the cardinality of a single set
31
- def count(counter_name)
32
- union_helper([counter_name])
33
- end
34
-
35
- # Estimate the cardinality of the union of several sets
36
- def union(*counter_names)
37
- union_helper(counter_names)
38
- end
39
-
40
- # Store the union of several sets in *destination* so that it can be used as
41
- # a HyperLogLog counter later.
42
- def union_store(destination, *counter_names)
43
- raw_union(counter_names).each do |key, count|
44
- @redis.hset(destination, key, count)
45
- end
46
- end
47
-
48
- # Estimate the cardinality of the intersection of several sets. We do this by
49
- # using the principle of inclusion and exclusion to represent the size of the
50
- # intersection as the alternating sum of an exponential number of
51
- # cardinalities of unions of smaller sets.
52
- def intersection(*counter_names)
53
- icount = (1..counter_names.length).map do |k|
54
- counter_names.combination(k).map do |group|
55
- ((k % 2 == 0) ? -1 : 1) * union_helper(group)
56
- end.inject(0, :+)
57
- end.inject(0, :+)
58
- [icount, 0].max
59
- end
60
-
61
- def union_helper(counter_names)
62
- all_estimates = raw_union(counter_names).map{ |value, score| 2 ** -score }
63
- estimate_sum = all_estimates.reduce(:+) || 0
64
- estimate = @alpha * @m * @m * ((estimate_sum + @m - all_estimates.length) ** -1)
65
- if estimate <= 2.5 * @m
66
- if all_estimates.length == @m
67
- estimate.round
68
- else # Correction for small sets
69
- (@m * Math.log(Float(@m)/(@m - all_estimates.length))).round
70
- end
71
- elsif estimate <= 2 ** 32 / 30.0
72
- estimate.round
73
- else # Correction for large sets
74
- (-2**32 * Math.log(1 - estimate/(2.0**32))).round
75
- end
76
- end
77
-
78
- def raw_union(counter_names)
79
- counter_names.map{ |counter_name| @redis.hgetall(counter_name).map{ |x,y| [x, y.to_i] } }
80
- .reduce(:concat)
81
- .group_by{ |key, count| key }
82
- .map{ |key, counters| [key, counters.map{ |x| x.last }.max] }
83
- end
84
-
85
- # rho(i) is the position of the first 1 in the binary representation of i,
86
- # reading from most significant to least significant bits. Some examples:
87
- # rho(1...) = 1, rho(001...) = 3, rho(000...0) = @bits_in_hash + 1
88
- def rho(i)
89
- if i == 0
90
- @bits_in_hash + 1
91
- else
92
- @bits_in_hash - Math.log(i, 2).floor
93
- end
94
- end
95
-
96
- end