hyperloglog-redis 1.0.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,224 +1,226 @@
1
1
  require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
2
 
3
3
  describe HyperLogLog do
4
-
5
- it "doesn't change its count when it sees values that it's already seen" do
6
- redis = Redis.new
7
- counter = HyperLogLog.new(redis, 10)
8
- test_set = (1..100).map{ |x| x.to_s }
9
- test_set.each{ |value| counter.add("mycounter", value) }
10
- original_estimate = counter.count("mycounter")
11
- 5.times do
12
- test_set.each do |value|
13
- counter.add("mycounter", value)
14
- counter.count("mycounter").should == original_estimate
4
+
5
+ [HyperLogLog::Counter, HyperLogLog::TimeSeriesCounter].each do |counter_type|
6
+
7
+ it "doesn't change its count when it sees values that it's already seen" do
8
+ redis = Redis.new
9
+ counter = counter_type.new(redis, 10)
10
+ test_set = (1..100).map{ |x| x.to_s }
11
+ test_set.each{ |value| counter.add("mycounter", value) }
12
+ original_estimate = counter.count("mycounter")
13
+ 5.times do
14
+ test_set.each do |value|
15
+ counter.add("mycounter", value)
16
+ counter.count("mycounter").should == original_estimate
17
+ end
15
18
  end
16
19
  end
17
- end
18
20
 
19
- it "can maintain more than one logically distinct counter" do
20
- redis = Redis.new
21
- counter = HyperLogLog.new(redis, 10)
22
- other_estimate = counter.count("counter2")
23
- (1..100).each do |i|
24
- counter.add("counter1", i.to_s)
25
- counter.count("counter2").should == other_estimate
21
+ it "can maintain more than one logically distinct counter" do
22
+ redis = Redis.new
23
+ counter = counter_type.new(redis, 10)
24
+ other_estimate = counter.count("counter2")
25
+ (1..100).each do |i|
26
+ counter.add("counter1", i.to_s)
27
+ counter.count("counter2").should == other_estimate
28
+ end
29
+ other_estimate = counter.count("counter1")
30
+ (101..200).each do |i|
31
+ counter.add("counter2", i.to_s)
32
+ counter.count("counter1").should == other_estimate
33
+ end
34
+ other_estimate = counter.count("counter2")
35
+ (201..300).each do |i|
36
+ counter.add("counter1", i.to_s)
37
+ counter.count("counter2").should == other_estimate
38
+ end
39
+ counter.count("counter1").should > 100
40
+ counter.count("counter2").should > 50
41
+ counter.count("counter1").should > counter.count("counter2")
26
42
  end
27
- other_estimate = counter.count("counter1")
28
- (101..200).each do |i|
29
- counter.add("counter2", i.to_s)
30
- counter.count("counter1").should == other_estimate
43
+
44
+ it "can exactly count small sets" do
45
+ redis = Redis.new
46
+ counter = counter_type.new(redis, 11)
47
+ 10.times { |i| counter.add("mycounter", i.to_s) }
48
+ counter.count("mycounter").should == 10
31
49
  end
32
- other_estimate = counter.count("counter2")
33
- (201..300).each do |i|
34
- counter.add("counter1", i.to_s)
35
- counter.count("counter2").should == other_estimate
36
- end
37
- counter.count("counter1").should > 100
38
- counter.count("counter2").should > 50
39
- counter.count("counter1").should > counter.count("counter2")
40
- end
41
-
42
- it "can exactly count small sets" do
43
- redis = Redis.new
44
- counter = HyperLogLog.new(redis, 11)
45
- 10.times { |i| counter.add("mycounter", i.to_s) }
46
- counter.count("mycounter").should == 10
47
- end
48
-
49
- it "can exactly count small unions" do
50
- redis = Redis.new
51
- counter = HyperLogLog.new(redis, 11)
52
- (1..8).each { |i| counter.add("mycounter1", i.to_s) }
53
- (5..12).each { |i| counter.add("mycounter2", i.to_s) }
54
- counter.union("mycounter1", "mycounter2").should == 12
55
- end
56
-
57
- it "can exactly count small intersections" do
58
- redis = Redis.new
59
- counter = HyperLogLog.new(redis, 11)
60
- (1..8).each { |i| counter.add("mycounter1", i.to_s) }
61
- (5..12).each { |i| counter.add("mycounter2", i.to_s) }
62
- counter.intersection("mycounter1", "mycounter2").should == 4
63
- end
64
-
65
- it "can store unions for querying later" do
66
- redis = Redis.new
67
- counter = HyperLogLog.new(redis, 11)
68
- (1..10).each { |i| counter.add("mycounter1", i.to_s) }
69
- (5..15).each { |i| counter.add("mycounter2", i.to_s) }
70
- (15..25).each { |i| counter.add("mycounter3", i.to_s) }
71
- (20..50).each { |i| counter.add("mycounter4", i.to_s) }
72
- counter.union_store("aggregate_counter", "mycounter1", "mycounter2", "mycounter3", "mycounter4")
73
- counter.union("mycounter1", "mycounter2", "mycounter3", "mycounter4").should == counter.count("aggregate_counter")
74
- end
75
-
76
- # With parameter b, HyperLogLog should produce estimates that have
77
- # relative error of 1.04 / Math.sqrt(2 ** b). Of course, this analysis
78
- # is based on assumptions that aren't necessarily true in practice and
79
- # the observed relative error will depend on the distribution of data
80
- # we receive as well as the interaction of the murmur hash implementation
81
- # with that data. Keeping that in mind, the following spec makes sure
82
- # that in the process of adding 1000 values to a set, HyperLogLog only
83
- # gives bad estimates (more than twice the expected relative error) in
84
- # less than 1% of the cases and never gives very bad estimates (more than
85
- # three times the expected relative error.)
86
- #
87
- # It's fine to fudge these numbers a little if the implementation changes,
88
- # since you can clearly find a different set of values that make this test
89
- # fail even without changing the implementation. But it should serve as a
90
- # good indication that there aren't any logical errors in the HyperLogLog
91
- # implementation, since it exercises all of the cases in HyperLogLog's
92
- # count method except for the correction for very large set sizes.
93
-
94
- it "produces acceptable estimates for counts" do
95
- max_items = 1000
96
- redis = Redis.new
97
- (6..16).each do |b|
98
- counter = HyperLogLog.new(redis, b)
99
- redis.del('mycounter')
50
+
51
+ it "can exactly count small unions" do
52
+ redis = Redis.new
53
+ counter = counter_type.new(redis, 11)
54
+ (1..8).each { |i| counter.add("mycounter1", i.to_s) }
55
+ (5..12).each { |i| counter.add("mycounter2", i.to_s) }
56
+ counter.union(["mycounter1", "mycounter2"]).should == 12
57
+ end
58
+
59
+ it "can exactly count small intersections" do
60
+ redis = Redis.new
61
+ counter = counter_type.new(redis, 11)
62
+ (1..8).each { |i| counter.add("mycounter1", i.to_s) }
63
+ (5..12).each { |i| counter.add("mycounter2", i.to_s) }
64
+ counter.intersection(["mycounter1", "mycounter2"]).should == 4
65
+ end
66
+
67
+ it "can store unions for querying later" do
68
+ redis = Redis.new
69
+ counter = counter_type.new(redis, 11)
70
+ (1..10).each { |i| counter.add("mycounter1", i.to_s) }
71
+ (5..15).each { |i| counter.add("mycounter2", i.to_s) }
72
+ (15..25).each { |i| counter.add("mycounter3", i.to_s) }
73
+ (20..50).each { |i| counter.add("mycounter4", i.to_s) }
74
+ counter.union_store("aggregate_counter", ["mycounter1", "mycounter2", "mycounter3", "mycounter4"])
75
+ counter.union(["mycounter1", "mycounter2", "mycounter3", "mycounter4"]).should == counter.count("aggregate_counter")
76
+ end
77
+
78
+ # With parameter b, HyperLogLog should produce estimates that have
79
+ # relative error of 1.04 / Math.sqrt(2 ** b). Of course, this analysis
80
+ # is based on assumptions that aren't necessarily true in practice and
81
+ # the observed relative error will depend on the distribution of data
82
+ # we receive as well as the interaction of the murmur hash implementation
83
+ # with that data. Keeping that in mind, the following spec makes sure
84
+ # that in the process of adding 1000 values to a set, HyperLogLog only
85
+ # gives bad estimates (more than twice the expected relative error) in
86
+ # less than 1% of the cases and never gives very bad estimates (more than
87
+ # three times the expected relative error.)
88
+ #
89
+ # It's fine to fudge these numbers a little if the implementation changes,
90
+ # since you can clearly find a different set of values that make this test
91
+ # fail even without changing the implementation. But it should serve as a
92
+ # good indication that there aren't any logical errors in the HyperLogLog
93
+ # implementation, since it exercises all of the cases in HyperLogLog's
94
+ # count method except for the correction for very large set sizes.
95
+
96
+ it "produces acceptable estimates for counts" do
97
+ max_items = 1000
98
+ redis = Redis.new
99
+ (6..16).each do |b|
100
+ counter = counter_type.new(redis, b)
101
+ redis.del('mycounter')
102
+ bad_estimates = 0
103
+ very_bad_estimates = 0
104
+ expected_relative_error = 1.04 / Math.sqrt(2 ** b)
105
+ max_items.times do |i|
106
+ value = Digest::MD5.hexdigest("value#{i}")
107
+ counter.add("mycounter", value)
108
+ actual = i + 1
109
+ approximate = counter.count("mycounter")
110
+ relative_error = (actual - approximate).abs / Float(actual)
111
+ bad_estimates += 1 if relative_error > expected_relative_error * 2
112
+ very_bad_estimates += 1 if relative_error > expected_relative_error * 3
113
+ end
114
+ bad_estimates.should < max_items / 100.00
115
+ very_bad_estimates.should == 0
116
+ end
117
+ end
118
+
119
+ it "produces acceptable estimates for unions with few elements in common" do
120
+ b, max_items = 10, 2000
121
+ counter = counter_type.new(Redis.new, b)
100
122
  bad_estimates = 0
101
123
  very_bad_estimates = 0
102
124
  expected_relative_error = 1.04 / Math.sqrt(2 ** b)
103
125
  max_items.times do |i|
104
- value = Digest::MD5.hexdigest("value#{i}")
105
- counter.add("mycounter", value)
106
- actual = i + 1
107
- approximate = counter.count("mycounter")
126
+ value1 = Digest::MD5.hexdigest("value#{i}")
127
+ counter.add("mycounter1", value1)
128
+ value2 = Digest::MD5.hexdigest("value#{i}incounter2")
129
+ counter.add("mycounter2", value2)
130
+ value3 = Digest::MD5.hexdigest("this is value#{i}")
131
+ counter.add("mycounter3", value3)
132
+ actual = 3 * (i + 1)
133
+ approximate = counter.union(["mycounter1", "mycounter2", "mycounter3"])
108
134
  relative_error = (actual - approximate).abs / Float(actual)
109
135
  bad_estimates += 1 if relative_error > expected_relative_error * 2
110
136
  very_bad_estimates += 1 if relative_error > expected_relative_error * 3
111
137
  end
112
- bad_estimates.should < max_items / 100.00
138
+ bad_estimates.should < (3 * max_items) / 100.00
113
139
  very_bad_estimates.should == 0
114
140
  end
115
- end
116
-
117
- it "produces acceptable estimates for unions with few elements in common" do
118
- b, max_items = 10, 2000
119
- counter = HyperLogLog.new(Redis.new, b)
120
- bad_estimates = 0
121
- very_bad_estimates = 0
122
- expected_relative_error = 1.04 / Math.sqrt(2 ** b)
123
- max_items.times do |i|
124
- value1 = Digest::MD5.hexdigest("value#{i}")
125
- counter.add("mycounter1", value1)
126
- value2 = Digest::MD5.hexdigest("value#{i}incounter2")
127
- counter.add("mycounter2", value2)
128
- value3 = Digest::MD5.hexdigest("this is value#{i}")
129
- counter.add("mycounter3", value3)
130
- actual = 3 * (i + 1)
131
- approximate = counter.union("mycounter1", "mycounter2", "mycounter3")
132
- relative_error = (actual - approximate).abs / Float(actual)
133
- bad_estimates += 1 if relative_error > expected_relative_error * 2
134
- very_bad_estimates += 1 if relative_error > expected_relative_error * 3
135
- end
136
- bad_estimates.should < (3 * max_items) / 100.00
137
- very_bad_estimates.should == 0
138
- end
139
-
140
- it "produces acceptable estimates for unions with many elements in common" do
141
- b, max_items, intersection_size = 10, 1000, 2000
142
- counter = HyperLogLog.new(Redis.new, b)
143
- bad_estimates = 0
144
- very_bad_estimates = 0
145
- expected_relative_error = 1.04 / Math.sqrt(2 ** b)
146
-
147
- intersection_size.times do |i|
148
- value = Digest::MD5.hexdigest("test#{i}value")
149
- ['mycounter1', 'mycounter2', 'mycounter3'].each do |counter_name|
150
- counter.add(counter_name, value)
141
+
142
+ it "produces acceptable estimates for unions with many elements in common" do
143
+ b, max_items, intersection_size = 10, 1000, 2000
144
+ counter = counter_type.new(Redis.new, b)
145
+ bad_estimates = 0
146
+ very_bad_estimates = 0
147
+ expected_relative_error = 1.04 / Math.sqrt(2 ** b)
148
+
149
+ intersection_size.times do |i|
150
+ value = Digest::MD5.hexdigest("test#{i}value")
151
+ ['mycounter1', 'mycounter2', 'mycounter3'].each do |counter_name|
152
+ counter.add(counter_name, value)
153
+ end
151
154
  end
155
+
156
+ max_items.times do |i|
157
+ value1 = Digest::MD5.hexdigest("value#{i}")
158
+ counter.add("mycounter1", value1)
159
+ value2 = Digest::MD5.hexdigest("value#{i}isincounter2")
160
+ counter.add("mycounter2", value2)
161
+ value3 = Digest::MD5.hexdigest("this is value#{i}")
162
+ counter.add("mycounter3", value3)
163
+ actual = 3 * (i + 1) + intersection_size
164
+ approximate = counter.union(["mycounter1", "mycounter2", "mycounter3"])
165
+ relative_error = (actual - approximate).abs / Float(actual)
166
+ bad_estimates += 1 if relative_error > expected_relative_error * 2
167
+ very_bad_estimates += 1 if relative_error > expected_relative_error * 3
168
+ end
169
+
170
+ bad_estimates.should < ((3 * max_items) + intersection_size) / 100.00
171
+ very_bad_estimates.should == 0
152
172
  end
153
173
 
154
- max_items.times do |i|
155
- value1 = Digest::MD5.hexdigest("value#{i}")
156
- counter.add("mycounter1", value1)
157
- value2 = Digest::MD5.hexdigest("value#{i}isincounter2")
158
- counter.add("mycounter2", value2)
159
- value3 = Digest::MD5.hexdigest("this is value#{i}")
160
- counter.add("mycounter3", value3)
161
- actual = 3 * (i + 1) + intersection_size
162
- approximate = counter.union("mycounter1", "mycounter2", "mycounter3")
163
- relative_error = (actual - approximate).abs / Float(actual)
164
- bad_estimates += 1 if relative_error > expected_relative_error * 2
165
- very_bad_estimates += 1 if relative_error > expected_relative_error * 3
166
- end
167
-
168
- bad_estimates.should < ((3 * max_items) + intersection_size) / 100.00
169
- very_bad_estimates.should == 0
170
- end
171
-
172
- # There are no good theoretical guarantees that I know of for arbitrary
173
- # intersection estimation, since it's expessed as the sum of unions of
174
- # HyperLogLog counters, but it tends to work okay in practice, as seen below.
175
-
176
- it "produces decent estimates for intersections" do
177
- b, max_items = 6, 1000
178
- counter = HyperLogLog.new(Redis.new, b)
179
- expected_relative_error = 1.04 / Math.sqrt(2 ** b)
180
-
181
- max_items.times do |i|
182
- value1 = Digest::MD5.hexdigest("first-value#{i}")
183
- value2 = Digest::MD5.hexdigest("second-value#{i}")
184
- value3 = Digest::MD5.hexdigest("third-value#{i}")
185
- value4 = Digest::MD5.hexdigest("fourth-value#{i}")
186
- counter.add("mycounter1", value1)
187
- counter.add("mycounter2", value2)
188
- counter.add("mycounter3", value3)
189
- counter.add("mycounter4", value4)
190
- [value1, value2, value3, value4].each{ |value| counter.add("mycounter5", value) }
191
- end
192
-
193
- small_counters = ['mycounter1', 'mycounter2', 'mycounter3', 'mycounter4']
174
+ # There are no good theoretical guarantees that I know of for arbitrary
175
+ # intersection estimation, since it's expessed as the sum of unions of
176
+ # HyperLogLog counters, but it tends to work okay in practice, as seen below.
194
177
 
195
- small_counters.each do |counter_name|
196
- intersection_estimate = counter.intersection(counter_name, 'mycounter5')
197
- intersection_estimate.should > 0
198
- (intersection_estimate - counter.count(counter_name)).abs.should < max_items * expected_relative_error
199
- end
200
-
201
- [2,3].each do |intersection_size|
202
- small_counters.combination(intersection_size).each do |counter_names|
203
- intersection_estimate = counter.intersection(*counter_names)
204
- intersection_estimate.should >= 0
205
- intersection_estimate.should < intersection_size * max_items * expected_relative_error
178
+ it "produces decent estimates for intersections" do
179
+ b, max_items = 6, 1000
180
+ counter = counter_type.new(Redis.new, b)
181
+ expected_relative_error = 1.04 / Math.sqrt(2 ** b)
182
+
183
+ max_items.times do |i|
184
+ value1 = Digest::MD5.hexdigest("first-value#{i}")
185
+ value2 = Digest::MD5.hexdigest("second-value#{i}")
186
+ value3 = Digest::MD5.hexdigest("third-value#{i}")
187
+ value4 = Digest::MD5.hexdigest("fourth-value#{i}")
188
+ counter.add("mycounter1", value1)
189
+ counter.add("mycounter2", value2)
190
+ counter.add("mycounter3", value3)
191
+ counter.add("mycounter4", value4)
192
+ [value1, value2, value3, value4].each{ |value| counter.add("mycounter5", value) }
206
193
  end
207
- end
208
-
209
- 100.times do |i|
210
- value = Digest::MD5.hexdigest("somethingintheintersection#{i}")
211
- small_counters.each { |counter_name| counter.add(counter_name, value) }
212
- end
213
-
214
- [2,3,4].each do |intersection_size|
215
- small_counters.combination(intersection_size).each do |counter_names|
216
- intersection_estimate = counter.intersection(*counter_names)
217
- intersection_estimate.should >= 0
218
- (intersection_estimate - 100).abs.should < intersection_size * (max_items + 100) * expected_relative_error
194
+
195
+ small_counters = ['mycounter1', 'mycounter2', 'mycounter3', 'mycounter4']
196
+
197
+ small_counters.each do |counter_name|
198
+ intersection_estimate = counter.intersection([counter_name, 'mycounter5'])
199
+ intersection_estimate.should > 0
200
+ (intersection_estimate - counter.count(counter_name)).abs.should < max_items * expected_relative_error
201
+ end
202
+
203
+ [2,3].each do |intersection_size|
204
+ small_counters.combination(intersection_size).each do |counter_names|
205
+ intersection_estimate = counter.intersection(counter_names)
206
+ intersection_estimate.should >= 0
207
+ intersection_estimate.should < intersection_size * max_items * expected_relative_error
208
+ end
219
209
  end
210
+
211
+ 100.times do |i|
212
+ value = Digest::MD5.hexdigest("somethingintheintersection#{i}")
213
+ small_counters.each { |counter_name| counter.add(counter_name, value) }
214
+ end
215
+
216
+ [2,3,4].each do |intersection_size|
217
+ small_counters.combination(intersection_size).each do |counter_names|
218
+ intersection_estimate = counter.intersection(counter_names)
219
+ intersection_estimate.should >= 0
220
+ (intersection_estimate - 100).abs.should < intersection_size * (max_items + 100) * expected_relative_error
221
+ end
222
+ end
223
+
220
224
  end
221
-
222
225
  end
223
-
224
226
  end
@@ -0,0 +1,216 @@
1
+ require 'securerandom'
2
+ require 'timecop'
3
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
4
+
5
+ MINUTES=60
6
+ HOURS=MINUTES*60
7
+ DAYS=HOURS*24
8
+ WEEKS=DAYS*7
9
+
10
+ describe HyperLogLog::TimeSeriesCounter do
11
+
12
+ before(:each) do
13
+ @b = 11
14
+ @redis = Redis.new
15
+ @counter = HyperLogLog::TimeSeriesCounter.new(@redis, @b)
16
+ @expected_relative_error = 1.04 / Math.sqrt(2 ** @b)
17
+
18
+ def counter_should_equal(counter_val, expected_val, relative_error_base=nil)
19
+ (counter_val - expected_val).abs.should <= (relative_error_base || expected_val) * @expected_relative_error
20
+ end
21
+ end
22
+
23
+ it "can estimate cardinalities from any particular point in time until the present" do
24
+ Timecop.travel(Time.now - 2 * WEEKS) do
25
+ (0..100).each { |i| @counter.add('mycounter', "item#{i}") }
26
+ end
27
+ Timecop.travel(Time.now - 1 * WEEKS) do
28
+ (100..200).each { |i| @counter.add('mycounter', "item#{i}") }
29
+ end
30
+ Timecop.travel(Time.now - 6 * DAYS) do
31
+ (0..100).each { |i| @counter.add('mycounter', "item#{i}") }
32
+ end
33
+ Timecop.travel(Time.now - 5 * DAYS) do
34
+ (100..200).each { |i| @counter.add('mycounter', "item#{i}") }
35
+ end
36
+ Timecop.travel(Time.now - 4 * DAYS) do
37
+ (200..250).each { |i| @counter.add('mycounter', "item#{i}") }
38
+ end
39
+
40
+ counter_should_equal(@counter.count('mycounter'), 250)
41
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 3 * WEEKS), 250)
42
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 1 * WEEKS - 3 * DAYS), 250)
43
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 1 * WEEKS), 250)
44
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 5 * DAYS - 12 * HOURS), 150, 250)
45
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 4 * DAYS - 12 * HOURS), 50, 250)
46
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 3 * DAYS), 0, 250)
47
+ end
48
+
49
+ it "can estimate unions from any particular point in time until the present" do
50
+ Timecop.travel(Time.now - 2 * WEEKS) do
51
+ (0..100).each { |i| @counter.add('mycounter1', "item#{i}") }
52
+ end
53
+ Timecop.travel(Time.now - 1 * WEEKS) do
54
+ (100..200).each { |i| @counter.add('mycounter2', "item#{i}") }
55
+ end
56
+ Timecop.travel(Time.now - 6 * DAYS) do
57
+ (0..100).each { |i| @counter.add('mycounter1', "item#{i}") }
58
+ end
59
+ Timecop.travel(Time.now - 5 * DAYS) do
60
+ (100..200).each { |i| @counter.add('mycounter2', "item#{i}") }
61
+ end
62
+ Timecop.travel(Time.now - 4 * DAYS) do
63
+ (200..250).each { |i| @counter.add('mycounter1', "item#{i}") }
64
+ end
65
+
66
+ counter_should_equal(@counter.union(['mycounter1', 'mycounter2']), 250)
67
+ counter_should_equal(@counter.union(['mycounter1', 'mycounter2'], Time.now.to_i - 3 * WEEKS), 250)
68
+ counter_should_equal(@counter.union(['mycounter1', 'mycounter2'], Time.now.to_i - 1 * WEEKS - 3 * DAYS), 250)
69
+ counter_should_equal(@counter.union(['mycounter1', 'mycounter2'], Time.now.to_i - 1 * WEEKS), 250)
70
+ counter_should_equal(@counter.union(['mycounter1', 'mycounter2'], Time.now.to_i - 5 * DAYS - 12 * HOURS), 150, 250)
71
+ counter_should_equal(@counter.union(['mycounter1', 'mycounter2'], Time.now.to_i - 4 * DAYS - 12 * HOURS), 50, 250)
72
+ counter_should_equal(@counter.union(['mycounter1', 'mycounter2'], Time.now.to_i - 3 * DAYS), 0, 250)
73
+ end
74
+
75
+ it "can estimate intersections from any particular point in time until the present" do
76
+ Timecop.travel(Time.now - 2 * WEEKS) do
77
+ (0..100).each { |i| @counter.add('mycounter1', "item#{i}") }
78
+ end
79
+ Timecop.travel(Time.now - 1 * WEEKS) do
80
+ (100..200).each { |i| @counter.add('mycounter2', "item#{i}") }
81
+ end
82
+ Timecop.travel(Time.now - 6 * DAYS) do
83
+ (0..100).each { |i| @counter.add('mycounter2', "item#{i}") }
84
+ end
85
+ Timecop.travel(Time.now - 5 * DAYS) do
86
+ (100..200).each { |i| @counter.add('mycounter1', "item#{i}") }
87
+ end
88
+ Timecop.travel(Time.now - 4 * DAYS) do
89
+ (200..250).each { |i| @counter.add('mycounter1', "item#{i}") }
90
+ end
91
+ Timecop.travel(Time.now - 3 * DAYS) do
92
+ (200..250).each { |i| @counter.add('mycounter2', "item#{i}") }
93
+ end
94
+
95
+ counter_should_equal(@counter.intersection(['mycounter1', 'mycounter2']), 250)
96
+ counter_should_equal(@counter.intersection(['mycounter1', 'mycounter2'], Time.now.to_i - 3 * WEEKS), 250)
97
+ counter_should_equal(@counter.intersection(['mycounter1', 'mycounter2'], Time.now.to_i - 1 * WEEKS - 3 * DAYS), 150, 250)
98
+ counter_should_equal(@counter.intersection(['mycounter1', 'mycounter2'], Time.now.to_i - 6 * DAYS - 12 * HOURS), 50, 250)
99
+ counter_should_equal(@counter.intersection(['mycounter1', 'mycounter2'], Time.now.to_i - 5 * DAYS - 12 * HOURS), 50, 250)
100
+ counter_should_equal(@counter.intersection(['mycounter1', 'mycounter2'], Time.now.to_i - 4 * DAYS - 12 * HOURS), 50, 250)
101
+ counter_should_equal(@counter.intersection(['mycounter1', 'mycounter2'], Time.now.to_i - 3 * DAYS - 12 * HOURS), 0, 250)
102
+ counter_should_equal(@counter.intersection(['mycounter1', 'mycounter2'], Time.now.to_i - 2 * DAYS), 0, 250)
103
+ end
104
+
105
+ it "can use union_store to store snapshots of counters at particular points in time" do
106
+ Timecop.travel(Time.now - 2 * WEEKS) do
107
+ (0..100).each { |i| @counter.add('mycounter1', "item#{i}") }
108
+ end
109
+ Timecop.travel(Time.now - 1 * WEEKS) do
110
+ (100..200).each { |i| @counter.add('mycounter2', "item#{i}") }
111
+ end
112
+ Timecop.travel(Time.now - 6 * DAYS) do
113
+ (0..100).each { |i| @counter.add('mycounter2', "item#{i}") }
114
+ end
115
+ Timecop.travel(Time.now - 5 * DAYS) do
116
+ (100..200).each { |i| @counter.add('mycounter1', "item#{i}") }
117
+ end
118
+ Timecop.travel(Time.now - 4 * DAYS) do
119
+ (200..250).each { |i| @counter.add('mycounter1', "item#{i}") }
120
+ end
121
+ Timecop.travel(Time.now - 3 * DAYS) do
122
+ (200..250).each { |i| @counter.add('mycounter2', "item#{i}") }
123
+ end
124
+
125
+ @counter.union_store('counter1_1_week_ago', ['mycounter1'], Time.now.to_i - 1 * WEEKS)
126
+ @counter.union_store('counter2_5_days_ago', ['mycounter2'], Time.now.to_i - 5 * DAYS)
127
+ counter_should_equal(@counter.union(['counter1_1_week_ago', 'counter2_5_days_ago']), 150, 250)
128
+ end
129
+
130
+ it "allows you to override the time an event is registered when it's added" do
131
+ (0..1000).each { |i| @counter.add('mycounter', "item#{i}", Time.now.to_i - 3 * WEEKS) }
132
+ (1000..2000).each { |i| @counter.add('mycounter', "item#{i}", Time.now.to_i - 2 * WEEKS) }
133
+ (2000..3000).each { |i| @counter.add('mycounter', "item#{i}", Time.now.to_i - 1 * WEEKS) }
134
+ (3000..4000).each { |i| @counter.add('mycounter', "item#{i}") }
135
+
136
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 4 * WEEKS), 4000)
137
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 2 * WEEKS - 3 * DAYS), 3000)
138
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 1 * WEEKS - 3 * DAYS), 2000)
139
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 3 * DAYS), 1000)
140
+ end
141
+
142
+ it "doesn't screw up more recent counts when items are injected with earlier timestamp overrides" do
143
+ Timecop.travel(Time.now - 3 * WEEKS) do
144
+ (0..1000).each { |i| @counter.add('mycounter', "item#{i}") }
145
+ end
146
+
147
+ Timecop.travel(Time.now - 2 * WEEKS) do
148
+ (1000..2000).each { |i| @counter.add('mycounter', "item#{i}") }
149
+ end
150
+
151
+ Timecop.travel(Time.now - 1 * WEEKS) do
152
+ (2000..3000).each { |i| @counter.add('mycounter', "item#{i}") }
153
+ end
154
+
155
+ Timecop.travel(Time.now - 2 * DAYS) do
156
+ (1000..2000).each { |i| @counter.add('mycounter', "item#{i}") }
157
+ end
158
+
159
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 4 * WEEKS), 3000)
160
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 2 * WEEKS - 3 * DAYS), 2000)
161
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 1 * WEEKS - 3 * DAYS), 2000)
162
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 3 * DAYS), 1000)
163
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 1 * DAYS), 0)
164
+
165
+ # Shouldn't change counts, since they're updates to counts that happen later
166
+ # than the time we're trying to inject
167
+ (1000..2000).each { |i| @counter.add('mycounter', "item#{i}", Time.now.to_i - 1 * WEEKS) }
168
+
169
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 4 * WEEKS), 3000)
170
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 2 * WEEKS - 3 * DAYS), 2000)
171
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 1 * WEEKS - 3 * DAYS), 2000)
172
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 3 * DAYS), 1000)
173
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 1 * DAYS), 0)
174
+
175
+ # Should change counts, since they're updates to counts for items we've never
176
+ # seen before in the past
177
+ (3000..4000).each { |i| @counter.add('mycounter', "item#{i}", Time.now.to_i - 1 * WEEKS) }
178
+
179
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 4 * WEEKS), 4000)
180
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 2 * WEEKS - 3 * DAYS), 3000)
181
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 1 * WEEKS - 3 * DAYS), 3000)
182
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 3 * DAYS), 1000)
183
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 1 * DAYS), 0)
184
+ end
185
+
186
+ it "can compute deltas over time on events correctly" do
187
+ # A larger-scale test that simulates user join events and tests that we can get
188
+ # week-by-week deltas. Generate new user counts according to the following
189
+ # weekly schedule: 55780 during the first week, 300 more during the next week,
190
+ # 10 more the next week, etc.
191
+
192
+ schedule = [55780, 300, 10, 4000, 1000, 1000, 5000, 15000, 30000, 3000]
193
+ schedule.each_with_index do |num_users, i|
194
+ Timecop.travel(Time.now - (schedule.length * WEEKS) + (i * WEEKS)) do
195
+ num_users.times do |i|
196
+ Timecop.travel(Time.now + 2 * HOURS + i) do
197
+ @counter.add("users", "user#{SecureRandom.uuid}")
198
+ end
199
+ end
200
+ end
201
+ end
202
+
203
+ actual_total = schedule.reduce(:+)
204
+ estimated_total = @counter.count("users")
205
+ (actual_total - estimated_total).abs.should < @expected_relative_error * actual_total
206
+
207
+ # Go through the schedule, computing week-by-week deltas and comparing them to the
208
+ # scheduled additions.
209
+
210
+ schedule.each_with_index do |users_joined, i|
211
+ week = schedule.length - 1 - i
212
+ c = @counter.count('users', Time.now.to_i - (week+1) * WEEKS) - @counter.count('users', Time.now.to_i - week * WEEKS)
213
+ (users_joined - c).abs.should < @expected_relative_error * schedule.reduce(:+)
214
+ end
215
+ end
216
+ end