hyperloglog-redis 1.0.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hyperloglog-redis
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 2.0.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-10-26 00:00:00.000000000Z
12
+ date: 2012-11-30 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: murmurhash3
16
- requirement: &2172774780 !ruby/object:Gem::Requirement
16
+ requirement: &2173180560 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: 0.1.3
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *2172774780
24
+ version_requirements: *2173180560
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: redis
27
- requirement: &2172774300 !ruby/object:Gem::Requirement
27
+ requirement: &2173180080 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 3.0.1
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *2172774300
35
+ version_requirements: *2173180080
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: jeweler
38
- requirement: &2172773820 !ruby/object:Gem::Requirement
38
+ requirement: &2173179600 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: 1.8.4
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *2172773820
46
+ version_requirements: *2173179600
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: rake
49
- requirement: &2172773340 !ruby/object:Gem::Requirement
49
+ requirement: &2173179120 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ~>
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: 0.9.2.2
55
55
  type: :development
56
56
  prerelease: false
57
- version_requirements: *2172773340
57
+ version_requirements: *2173179120
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: rspec
60
- requirement: &2172789220 !ruby/object:Gem::Requirement
60
+ requirement: &2173178640 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ~>
@@ -65,7 +65,18 @@ dependencies:
65
65
  version: 2.11.0
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *2172789220
68
+ version_requirements: *2173178640
69
+ - !ruby/object:Gem::Dependency
70
+ name: timecop
71
+ requirement: &2173178160 !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ~>
75
+ - !ruby/object:Gem::Version
76
+ version: 0.5.3
77
+ type: :development
78
+ prerelease: false
79
+ version_requirements: *2173178160
69
80
  description: An implementation of the HyperLogLog set cardinality estimation algorithm
70
81
  in Ruby using Redis as a back-end
71
82
  email: aaron.windsor@gmail.com
@@ -85,10 +96,13 @@ files:
85
96
  - Rakefile
86
97
  - VERSION
87
98
  - hyperloglog-redis.gemspec
88
- - lib/hyper_log_log.rb
99
+ - lib/algorithm.rb
100
+ - lib/counter.rb
89
101
  - lib/hyperloglog-redis.rb
102
+ - lib/time_series_counter.rb
90
103
  - spec/hyper_log_log_spec.rb
91
104
  - spec/spec_helper.rb
105
+ - spec/time_series_counter_spec.rb
92
106
  homepage: http://github.com/aaw/hyperloglog-redis
93
107
  licenses:
94
108
  - MIT
@@ -104,7 +118,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
104
118
  version: '0'
105
119
  segments:
106
120
  - 0
107
- hash: 2426569210961737114
121
+ hash: -3898531581503867473
108
122
  required_rubygems_version: !ruby/object:Gem::Requirement
109
123
  none: false
110
124
  requirements:
@@ -1,96 +0,0 @@
1
- require 'redis'
2
- require 'murmurhash3'
3
-
4
- class HyperLogLog
5
- def initialize(redis, b=10)
6
- raise "Accuracy not supported. Please choose a value of b between 4 and 16" if b < 4 || b > 16
7
- @redis = redis
8
- @bits_in_hash = 32 - b
9
- @m = (2 ** b).to_i
10
- if @m == 16
11
- @alpha = 0.673
12
- elsif @m == 32
13
- @alpha = 0.697
14
- elsif @m == 64
15
- @alpha = 0.709
16
- else
17
- @alpha = 0.7213/(1 + 1.079/@m)
18
- end
19
- end
20
-
21
- def add(counter_name, value)
22
- hash = MurmurHash3::V32.murmur3_32_str_hash(value)
23
- function_name = hash % @m
24
- w = hash / @m
25
- existing_value = (@redis.hget(counter_name, function_name) || 0).to_i
26
- new_value = [existing_value, rho(w)].max
27
- @redis.hset(counter_name, function_name, new_value) if new_value > existing_value
28
- end
29
-
30
- # Estimate the cardinality of a single set
31
- def count(counter_name)
32
- union_helper([counter_name])
33
- end
34
-
35
- # Estimate the cardinality of the union of several sets
36
- def union(*counter_names)
37
- union_helper(counter_names)
38
- end
39
-
40
- # Store the union of several sets in *destination* so that it can be used as
41
- # a HyperLogLog counter later.
42
- def union_store(destination, *counter_names)
43
- raw_union(counter_names).each do |key, count|
44
- @redis.hset(destination, key, count)
45
- end
46
- end
47
-
48
- # Estimate the cardinality of the intersection of several sets. We do this by
49
- # using the principle of inclusion and exclusion to represent the size of the
50
- # intersection as the alternating sum of an exponential number of
51
- # cardinalities of unions of smaller sets.
52
- def intersection(*counter_names)
53
- icount = (1..counter_names.length).map do |k|
54
- counter_names.combination(k).map do |group|
55
- ((k % 2 == 0) ? -1 : 1) * union_helper(group)
56
- end.inject(0, :+)
57
- end.inject(0, :+)
58
- [icount, 0].max
59
- end
60
-
61
- def union_helper(counter_names)
62
- all_estimates = raw_union(counter_names).map{ |value, score| 2 ** -score }
63
- estimate_sum = all_estimates.reduce(:+) || 0
64
- estimate = @alpha * @m * @m * ((estimate_sum + @m - all_estimates.length) ** -1)
65
- if estimate <= 2.5 * @m
66
- if all_estimates.length == @m
67
- estimate.round
68
- else # Correction for small sets
69
- (@m * Math.log(Float(@m)/(@m - all_estimates.length))).round
70
- end
71
- elsif estimate <= 2 ** 32 / 30.0
72
- estimate.round
73
- else # Correction for large sets
74
- (-2**32 * Math.log(1 - estimate/(2.0**32))).round
75
- end
76
- end
77
-
78
- def raw_union(counter_names)
79
- counter_names.map{ |counter_name| @redis.hgetall(counter_name).map{ |x,y| [x, y.to_i] } }
80
- .reduce(:concat)
81
- .group_by{ |key, count| key }
82
- .map{ |key, counters| [key, counters.map{ |x| x.last }.max] }
83
- end
84
-
85
- # rho(i) is the position of the first 1 in the binary representation of i,
86
- # reading from most significant to least significant bits. Some examples:
87
- # rho(1...) = 1, rho(001...) = 3, rho(000...0) = @bits_in_hash + 1
88
- def rho(i)
89
- if i == 0
90
- @bits_in_hash + 1
91
- else
92
- @bits_in_hash - Math.log(i, 2).floor
93
- end
94
- end
95
-
96
- end