hyperloglog-redis 1.0.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,224 +1,226 @@
1
1
  require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
2
 
3
3
  describe HyperLogLog do
4
-
5
- it "doesn't change its count when it sees values that it's already seen" do
6
- redis = Redis.new
7
- counter = HyperLogLog.new(redis, 10)
8
- test_set = (1..100).map{ |x| x.to_s }
9
- test_set.each{ |value| counter.add("mycounter", value) }
10
- original_estimate = counter.count("mycounter")
11
- 5.times do
12
- test_set.each do |value|
13
- counter.add("mycounter", value)
14
- counter.count("mycounter").should == original_estimate
4
+
5
+ [HyperLogLog::Counter, HyperLogLog::TimeSeriesCounter].each do |counter_type|
6
+
7
+ it "doesn't change its count when it sees values that it's already seen" do
8
+ redis = Redis.new
9
+ counter = counter_type.new(redis, 10)
10
+ test_set = (1..100).map{ |x| x.to_s }
11
+ test_set.each{ |value| counter.add("mycounter", value) }
12
+ original_estimate = counter.count("mycounter")
13
+ 5.times do
14
+ test_set.each do |value|
15
+ counter.add("mycounter", value)
16
+ counter.count("mycounter").should == original_estimate
17
+ end
15
18
  end
16
19
  end
17
- end
18
20
 
19
- it "can maintain more than one logically distinct counter" do
20
- redis = Redis.new
21
- counter = HyperLogLog.new(redis, 10)
22
- other_estimate = counter.count("counter2")
23
- (1..100).each do |i|
24
- counter.add("counter1", i.to_s)
25
- counter.count("counter2").should == other_estimate
21
+ it "can maintain more than one logically distinct counter" do
22
+ redis = Redis.new
23
+ counter = counter_type.new(redis, 10)
24
+ other_estimate = counter.count("counter2")
25
+ (1..100).each do |i|
26
+ counter.add("counter1", i.to_s)
27
+ counter.count("counter2").should == other_estimate
28
+ end
29
+ other_estimate = counter.count("counter1")
30
+ (101..200).each do |i|
31
+ counter.add("counter2", i.to_s)
32
+ counter.count("counter1").should == other_estimate
33
+ end
34
+ other_estimate = counter.count("counter2")
35
+ (201..300).each do |i|
36
+ counter.add("counter1", i.to_s)
37
+ counter.count("counter2").should == other_estimate
38
+ end
39
+ counter.count("counter1").should > 100
40
+ counter.count("counter2").should > 50
41
+ counter.count("counter1").should > counter.count("counter2")
26
42
  end
27
- other_estimate = counter.count("counter1")
28
- (101..200).each do |i|
29
- counter.add("counter2", i.to_s)
30
- counter.count("counter1").should == other_estimate
43
+
44
+ it "can exactly count small sets" do
45
+ redis = Redis.new
46
+ counter = counter_type.new(redis, 11)
47
+ 10.times { |i| counter.add("mycounter", i.to_s) }
48
+ counter.count("mycounter").should == 10
31
49
  end
32
- other_estimate = counter.count("counter2")
33
- (201..300).each do |i|
34
- counter.add("counter1", i.to_s)
35
- counter.count("counter2").should == other_estimate
36
- end
37
- counter.count("counter1").should > 100
38
- counter.count("counter2").should > 50
39
- counter.count("counter1").should > counter.count("counter2")
40
- end
41
-
42
- it "can exactly count small sets" do
43
- redis = Redis.new
44
- counter = HyperLogLog.new(redis, 11)
45
- 10.times { |i| counter.add("mycounter", i.to_s) }
46
- counter.count("mycounter").should == 10
47
- end
48
-
49
- it "can exactly count small unions" do
50
- redis = Redis.new
51
- counter = HyperLogLog.new(redis, 11)
52
- (1..8).each { |i| counter.add("mycounter1", i.to_s) }
53
- (5..12).each { |i| counter.add("mycounter2", i.to_s) }
54
- counter.union("mycounter1", "mycounter2").should == 12
55
- end
56
-
57
- it "can exactly count small intersections" do
58
- redis = Redis.new
59
- counter = HyperLogLog.new(redis, 11)
60
- (1..8).each { |i| counter.add("mycounter1", i.to_s) }
61
- (5..12).each { |i| counter.add("mycounter2", i.to_s) }
62
- counter.intersection("mycounter1", "mycounter2").should == 4
63
- end
64
-
65
- it "can store unions for querying later" do
66
- redis = Redis.new
67
- counter = HyperLogLog.new(redis, 11)
68
- (1..10).each { |i| counter.add("mycounter1", i.to_s) }
69
- (5..15).each { |i| counter.add("mycounter2", i.to_s) }
70
- (15..25).each { |i| counter.add("mycounter3", i.to_s) }
71
- (20..50).each { |i| counter.add("mycounter4", i.to_s) }
72
- counter.union_store("aggregate_counter", "mycounter1", "mycounter2", "mycounter3", "mycounter4")
73
- counter.union("mycounter1", "mycounter2", "mycounter3", "mycounter4").should == counter.count("aggregate_counter")
74
- end
75
-
76
- # With parameter b, HyperLogLog should produce estimates that have
77
- # relative error of 1.04 / Math.sqrt(2 ** b). Of course, this analysis
78
- # is based on assumptions that aren't necessarily true in practice and
79
- # the observed relative error will depend on the distribution of data
80
- # we receive as well as the interaction of the murmur hash implementation
81
- # with that data. Keeping that in mind, the following spec makes sure
82
- # that in the process of adding 1000 values to a set, HyperLogLog only
83
- # gives bad estimates (more than twice the expected relative error) in
84
- # less than 1% of the cases and never gives very bad estimates (more than
85
- # three times the expected relative error.)
86
- #
87
- # It's fine to fudge these numbers a little if the implementation changes,
88
- # since you can clearly find a different set of values that make this test
89
- # fail even without changing the implementation. But it should serve as a
90
- # good indication that there aren't any logical errors in the HyperLogLog
91
- # implementation, since it exercises all of the cases in HyperLogLog's
92
- # count method except for the correction for very large set sizes.
93
-
94
- it "produces acceptable estimates for counts" do
95
- max_items = 1000
96
- redis = Redis.new
97
- (6..16).each do |b|
98
- counter = HyperLogLog.new(redis, b)
99
- redis.del('mycounter')
50
+
51
+ it "can exactly count small unions" do
52
+ redis = Redis.new
53
+ counter = counter_type.new(redis, 11)
54
+ (1..8).each { |i| counter.add("mycounter1", i.to_s) }
55
+ (5..12).each { |i| counter.add("mycounter2", i.to_s) }
56
+ counter.union(["mycounter1", "mycounter2"]).should == 12
57
+ end
58
+
59
+ it "can exactly count small intersections" do
60
+ redis = Redis.new
61
+ counter = counter_type.new(redis, 11)
62
+ (1..8).each { |i| counter.add("mycounter1", i.to_s) }
63
+ (5..12).each { |i| counter.add("mycounter2", i.to_s) }
64
+ counter.intersection(["mycounter1", "mycounter2"]).should == 4
65
+ end
66
+
67
+ it "can store unions for querying later" do
68
+ redis = Redis.new
69
+ counter = counter_type.new(redis, 11)
70
+ (1..10).each { |i| counter.add("mycounter1", i.to_s) }
71
+ (5..15).each { |i| counter.add("mycounter2", i.to_s) }
72
+ (15..25).each { |i| counter.add("mycounter3", i.to_s) }
73
+ (20..50).each { |i| counter.add("mycounter4", i.to_s) }
74
+ counter.union_store("aggregate_counter", ["mycounter1", "mycounter2", "mycounter3", "mycounter4"])
75
+ counter.union(["mycounter1", "mycounter2", "mycounter3", "mycounter4"]).should == counter.count("aggregate_counter")
76
+ end
77
+
78
+ # With parameter b, HyperLogLog should produce estimates that have
79
+ # relative error of 1.04 / Math.sqrt(2 ** b). Of course, this analysis
80
+ # is based on assumptions that aren't necessarily true in practice and
81
+ # the observed relative error will depend on the distribution of data
82
+ # we receive as well as the interaction of the murmur hash implementation
83
+ # with that data. Keeping that in mind, the following spec makes sure
84
+ # that in the process of adding 1000 values to a set, HyperLogLog only
85
+ # gives bad estimates (more than twice the expected relative error) in
86
+ # less than 1% of the cases and never gives very bad estimates (more than
87
+ # three times the expected relative error.)
88
+ #
89
+ # It's fine to fudge these numbers a little if the implementation changes,
90
+ # since you can clearly find a different set of values that make this test
91
+ # fail even without changing the implementation. But it should serve as a
92
+ # good indication that there aren't any logical errors in the HyperLogLog
93
+ # implementation, since it exercises all of the cases in HyperLogLog's
94
+ # count method except for the correction for very large set sizes.
95
+
96
+ it "produces acceptable estimates for counts" do
97
+ max_items = 1000
98
+ redis = Redis.new
99
+ (6..16).each do |b|
100
+ counter = counter_type.new(redis, b)
101
+ redis.del('mycounter')
102
+ bad_estimates = 0
103
+ very_bad_estimates = 0
104
+ expected_relative_error = 1.04 / Math.sqrt(2 ** b)
105
+ max_items.times do |i|
106
+ value = Digest::MD5.hexdigest("value#{i}")
107
+ counter.add("mycounter", value)
108
+ actual = i + 1
109
+ approximate = counter.count("mycounter")
110
+ relative_error = (actual - approximate).abs / Float(actual)
111
+ bad_estimates += 1 if relative_error > expected_relative_error * 2
112
+ very_bad_estimates += 1 if relative_error > expected_relative_error * 3
113
+ end
114
+ bad_estimates.should < max_items / 100.00
115
+ very_bad_estimates.should == 0
116
+ end
117
+ end
118
+
119
+ it "produces acceptable estimates for unions with few elements in common" do
120
+ b, max_items = 10, 2000
121
+ counter = counter_type.new(Redis.new, b)
100
122
  bad_estimates = 0
101
123
  very_bad_estimates = 0
102
124
  expected_relative_error = 1.04 / Math.sqrt(2 ** b)
103
125
  max_items.times do |i|
104
- value = Digest::MD5.hexdigest("value#{i}")
105
- counter.add("mycounter", value)
106
- actual = i + 1
107
- approximate = counter.count("mycounter")
126
+ value1 = Digest::MD5.hexdigest("value#{i}")
127
+ counter.add("mycounter1", value1)
128
+ value2 = Digest::MD5.hexdigest("value#{i}incounter2")
129
+ counter.add("mycounter2", value2)
130
+ value3 = Digest::MD5.hexdigest("this is value#{i}")
131
+ counter.add("mycounter3", value3)
132
+ actual = 3 * (i + 1)
133
+ approximate = counter.union(["mycounter1", "mycounter2", "mycounter3"])
108
134
  relative_error = (actual - approximate).abs / Float(actual)
109
135
  bad_estimates += 1 if relative_error > expected_relative_error * 2
110
136
  very_bad_estimates += 1 if relative_error > expected_relative_error * 3
111
137
  end
112
- bad_estimates.should < max_items / 100.00
138
+ bad_estimates.should < (3 * max_items) / 100.00
113
139
  very_bad_estimates.should == 0
114
140
  end
115
- end
116
-
117
- it "produces acceptable estimates for unions with few elements in common" do
118
- b, max_items = 10, 2000
119
- counter = HyperLogLog.new(Redis.new, b)
120
- bad_estimates = 0
121
- very_bad_estimates = 0
122
- expected_relative_error = 1.04 / Math.sqrt(2 ** b)
123
- max_items.times do |i|
124
- value1 = Digest::MD5.hexdigest("value#{i}")
125
- counter.add("mycounter1", value1)
126
- value2 = Digest::MD5.hexdigest("value#{i}incounter2")
127
- counter.add("mycounter2", value2)
128
- value3 = Digest::MD5.hexdigest("this is value#{i}")
129
- counter.add("mycounter3", value3)
130
- actual = 3 * (i + 1)
131
- approximate = counter.union("mycounter1", "mycounter2", "mycounter3")
132
- relative_error = (actual - approximate).abs / Float(actual)
133
- bad_estimates += 1 if relative_error > expected_relative_error * 2
134
- very_bad_estimates += 1 if relative_error > expected_relative_error * 3
135
- end
136
- bad_estimates.should < (3 * max_items) / 100.00
137
- very_bad_estimates.should == 0
138
- end
139
-
140
- it "produces acceptable estimates for unions with many elements in common" do
141
- b, max_items, intersection_size = 10, 1000, 2000
142
- counter = HyperLogLog.new(Redis.new, b)
143
- bad_estimates = 0
144
- very_bad_estimates = 0
145
- expected_relative_error = 1.04 / Math.sqrt(2 ** b)
146
-
147
- intersection_size.times do |i|
148
- value = Digest::MD5.hexdigest("test#{i}value")
149
- ['mycounter1', 'mycounter2', 'mycounter3'].each do |counter_name|
150
- counter.add(counter_name, value)
141
+
142
+ it "produces acceptable estimates for unions with many elements in common" do
143
+ b, max_items, intersection_size = 10, 1000, 2000
144
+ counter = counter_type.new(Redis.new, b)
145
+ bad_estimates = 0
146
+ very_bad_estimates = 0
147
+ expected_relative_error = 1.04 / Math.sqrt(2 ** b)
148
+
149
+ intersection_size.times do |i|
150
+ value = Digest::MD5.hexdigest("test#{i}value")
151
+ ['mycounter1', 'mycounter2', 'mycounter3'].each do |counter_name|
152
+ counter.add(counter_name, value)
153
+ end
151
154
  end
155
+
156
+ max_items.times do |i|
157
+ value1 = Digest::MD5.hexdigest("value#{i}")
158
+ counter.add("mycounter1", value1)
159
+ value2 = Digest::MD5.hexdigest("value#{i}isincounter2")
160
+ counter.add("mycounter2", value2)
161
+ value3 = Digest::MD5.hexdigest("this is value#{i}")
162
+ counter.add("mycounter3", value3)
163
+ actual = 3 * (i + 1) + intersection_size
164
+ approximate = counter.union(["mycounter1", "mycounter2", "mycounter3"])
165
+ relative_error = (actual - approximate).abs / Float(actual)
166
+ bad_estimates += 1 if relative_error > expected_relative_error * 2
167
+ very_bad_estimates += 1 if relative_error > expected_relative_error * 3
168
+ end
169
+
170
+ bad_estimates.should < ((3 * max_items) + intersection_size) / 100.00
171
+ very_bad_estimates.should == 0
152
172
  end
153
173
 
154
- max_items.times do |i|
155
- value1 = Digest::MD5.hexdigest("value#{i}")
156
- counter.add("mycounter1", value1)
157
- value2 = Digest::MD5.hexdigest("value#{i}isincounter2")
158
- counter.add("mycounter2", value2)
159
- value3 = Digest::MD5.hexdigest("this is value#{i}")
160
- counter.add("mycounter3", value3)
161
- actual = 3 * (i + 1) + intersection_size
162
- approximate = counter.union("mycounter1", "mycounter2", "mycounter3")
163
- relative_error = (actual - approximate).abs / Float(actual)
164
- bad_estimates += 1 if relative_error > expected_relative_error * 2
165
- very_bad_estimates += 1 if relative_error > expected_relative_error * 3
166
- end
167
-
168
- bad_estimates.should < ((3 * max_items) + intersection_size) / 100.00
169
- very_bad_estimates.should == 0
170
- end
171
-
172
- # There are no good theoretical guarantees that I know of for arbitrary
173
- # intersection estimation, since it's expessed as the sum of unions of
174
- # HyperLogLog counters, but it tends to work okay in practice, as seen below.
175
-
176
- it "produces decent estimates for intersections" do
177
- b, max_items = 6, 1000
178
- counter = HyperLogLog.new(Redis.new, b)
179
- expected_relative_error = 1.04 / Math.sqrt(2 ** b)
180
-
181
- max_items.times do |i|
182
- value1 = Digest::MD5.hexdigest("first-value#{i}")
183
- value2 = Digest::MD5.hexdigest("second-value#{i}")
184
- value3 = Digest::MD5.hexdigest("third-value#{i}")
185
- value4 = Digest::MD5.hexdigest("fourth-value#{i}")
186
- counter.add("mycounter1", value1)
187
- counter.add("mycounter2", value2)
188
- counter.add("mycounter3", value3)
189
- counter.add("mycounter4", value4)
190
- [value1, value2, value3, value4].each{ |value| counter.add("mycounter5", value) }
191
- end
192
-
193
- small_counters = ['mycounter1', 'mycounter2', 'mycounter3', 'mycounter4']
174
+ # There are no good theoretical guarantees that I know of for arbitrary
175
+ # intersection estimation, since it's expessed as the sum of unions of
176
+ # HyperLogLog counters, but it tends to work okay in practice, as seen below.
194
177
 
195
- small_counters.each do |counter_name|
196
- intersection_estimate = counter.intersection(counter_name, 'mycounter5')
197
- intersection_estimate.should > 0
198
- (intersection_estimate - counter.count(counter_name)).abs.should < max_items * expected_relative_error
199
- end
200
-
201
- [2,3].each do |intersection_size|
202
- small_counters.combination(intersection_size).each do |counter_names|
203
- intersection_estimate = counter.intersection(*counter_names)
204
- intersection_estimate.should >= 0
205
- intersection_estimate.should < intersection_size * max_items * expected_relative_error
178
+ it "produces decent estimates for intersections" do
179
+ b, max_items = 6, 1000
180
+ counter = counter_type.new(Redis.new, b)
181
+ expected_relative_error = 1.04 / Math.sqrt(2 ** b)
182
+
183
+ max_items.times do |i|
184
+ value1 = Digest::MD5.hexdigest("first-value#{i}")
185
+ value2 = Digest::MD5.hexdigest("second-value#{i}")
186
+ value3 = Digest::MD5.hexdigest("third-value#{i}")
187
+ value4 = Digest::MD5.hexdigest("fourth-value#{i}")
188
+ counter.add("mycounter1", value1)
189
+ counter.add("mycounter2", value2)
190
+ counter.add("mycounter3", value3)
191
+ counter.add("mycounter4", value4)
192
+ [value1, value2, value3, value4].each{ |value| counter.add("mycounter5", value) }
206
193
  end
207
- end
208
-
209
- 100.times do |i|
210
- value = Digest::MD5.hexdigest("somethingintheintersection#{i}")
211
- small_counters.each { |counter_name| counter.add(counter_name, value) }
212
- end
213
-
214
- [2,3,4].each do |intersection_size|
215
- small_counters.combination(intersection_size).each do |counter_names|
216
- intersection_estimate = counter.intersection(*counter_names)
217
- intersection_estimate.should >= 0
218
- (intersection_estimate - 100).abs.should < intersection_size * (max_items + 100) * expected_relative_error
194
+
195
+ small_counters = ['mycounter1', 'mycounter2', 'mycounter3', 'mycounter4']
196
+
197
+ small_counters.each do |counter_name|
198
+ intersection_estimate = counter.intersection([counter_name, 'mycounter5'])
199
+ intersection_estimate.should > 0
200
+ (intersection_estimate - counter.count(counter_name)).abs.should < max_items * expected_relative_error
201
+ end
202
+
203
+ [2,3].each do |intersection_size|
204
+ small_counters.combination(intersection_size).each do |counter_names|
205
+ intersection_estimate = counter.intersection(counter_names)
206
+ intersection_estimate.should >= 0
207
+ intersection_estimate.should < intersection_size * max_items * expected_relative_error
208
+ end
219
209
  end
210
+
211
+ 100.times do |i|
212
+ value = Digest::MD5.hexdigest("somethingintheintersection#{i}")
213
+ small_counters.each { |counter_name| counter.add(counter_name, value) }
214
+ end
215
+
216
+ [2,3,4].each do |intersection_size|
217
+ small_counters.combination(intersection_size).each do |counter_names|
218
+ intersection_estimate = counter.intersection(counter_names)
219
+ intersection_estimate.should >= 0
220
+ (intersection_estimate - 100).abs.should < intersection_size * (max_items + 100) * expected_relative_error
221
+ end
222
+ end
223
+
220
224
  end
221
-
222
225
  end
223
-
224
226
  end
@@ -0,0 +1,216 @@
1
+ require 'securerandom'
2
+ require 'timecop'
3
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
4
+
5
+ MINUTES=60
6
+ HOURS=MINUTES*60
7
+ DAYS=HOURS*24
8
+ WEEKS=DAYS*7
9
+
10
+ describe HyperLogLog::TimeSeriesCounter do
11
+
12
+ before(:each) do
13
+ @b = 11
14
+ @redis = Redis.new
15
+ @counter = HyperLogLog::TimeSeriesCounter.new(@redis, @b)
16
+ @expected_relative_error = 1.04 / Math.sqrt(2 ** @b)
17
+
18
+ def counter_should_equal(counter_val, expected_val, relative_error_base=nil)
19
+ (counter_val - expected_val).abs.should <= (relative_error_base || expected_val) * @expected_relative_error
20
+ end
21
+ end
22
+
23
+ it "can estimate cardinalities from any particular point in time until the present" do
24
+ Timecop.travel(Time.now - 2 * WEEKS) do
25
+ (0..100).each { |i| @counter.add('mycounter', "item#{i}") }
26
+ end
27
+ Timecop.travel(Time.now - 1 * WEEKS) do
28
+ (100..200).each { |i| @counter.add('mycounter', "item#{i}") }
29
+ end
30
+ Timecop.travel(Time.now - 6 * DAYS) do
31
+ (0..100).each { |i| @counter.add('mycounter', "item#{i}") }
32
+ end
33
+ Timecop.travel(Time.now - 5 * DAYS) do
34
+ (100..200).each { |i| @counter.add('mycounter', "item#{i}") }
35
+ end
36
+ Timecop.travel(Time.now - 4 * DAYS) do
37
+ (200..250).each { |i| @counter.add('mycounter', "item#{i}") }
38
+ end
39
+
40
+ counter_should_equal(@counter.count('mycounter'), 250)
41
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 3 * WEEKS), 250)
42
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 1 * WEEKS - 3 * DAYS), 250)
43
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 1 * WEEKS), 250)
44
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 5 * DAYS - 12 * HOURS), 150, 250)
45
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 4 * DAYS - 12 * HOURS), 50, 250)
46
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 3 * DAYS), 0, 250)
47
+ end
48
+
49
+ it "can estimate unions from any particular point in time until the present" do
50
+ Timecop.travel(Time.now - 2 * WEEKS) do
51
+ (0..100).each { |i| @counter.add('mycounter1', "item#{i}") }
52
+ end
53
+ Timecop.travel(Time.now - 1 * WEEKS) do
54
+ (100..200).each { |i| @counter.add('mycounter2', "item#{i}") }
55
+ end
56
+ Timecop.travel(Time.now - 6 * DAYS) do
57
+ (0..100).each { |i| @counter.add('mycounter1', "item#{i}") }
58
+ end
59
+ Timecop.travel(Time.now - 5 * DAYS) do
60
+ (100..200).each { |i| @counter.add('mycounter2', "item#{i}") }
61
+ end
62
+ Timecop.travel(Time.now - 4 * DAYS) do
63
+ (200..250).each { |i| @counter.add('mycounter1', "item#{i}") }
64
+ end
65
+
66
+ counter_should_equal(@counter.union(['mycounter1', 'mycounter2']), 250)
67
+ counter_should_equal(@counter.union(['mycounter1', 'mycounter2'], Time.now.to_i - 3 * WEEKS), 250)
68
+ counter_should_equal(@counter.union(['mycounter1', 'mycounter2'], Time.now.to_i - 1 * WEEKS - 3 * DAYS), 250)
69
+ counter_should_equal(@counter.union(['mycounter1', 'mycounter2'], Time.now.to_i - 1 * WEEKS), 250)
70
+ counter_should_equal(@counter.union(['mycounter1', 'mycounter2'], Time.now.to_i - 5 * DAYS - 12 * HOURS), 150, 250)
71
+ counter_should_equal(@counter.union(['mycounter1', 'mycounter2'], Time.now.to_i - 4 * DAYS - 12 * HOURS), 50, 250)
72
+ counter_should_equal(@counter.union(['mycounter1', 'mycounter2'], Time.now.to_i - 3 * DAYS), 0, 250)
73
+ end
74
+
75
+ it "can estimate intersections from any particular point in time until the present" do
76
+ Timecop.travel(Time.now - 2 * WEEKS) do
77
+ (0..100).each { |i| @counter.add('mycounter1', "item#{i}") }
78
+ end
79
+ Timecop.travel(Time.now - 1 * WEEKS) do
80
+ (100..200).each { |i| @counter.add('mycounter2', "item#{i}") }
81
+ end
82
+ Timecop.travel(Time.now - 6 * DAYS) do
83
+ (0..100).each { |i| @counter.add('mycounter2', "item#{i}") }
84
+ end
85
+ Timecop.travel(Time.now - 5 * DAYS) do
86
+ (100..200).each { |i| @counter.add('mycounter1', "item#{i}") }
87
+ end
88
+ Timecop.travel(Time.now - 4 * DAYS) do
89
+ (200..250).each { |i| @counter.add('mycounter1', "item#{i}") }
90
+ end
91
+ Timecop.travel(Time.now - 3 * DAYS) do
92
+ (200..250).each { |i| @counter.add('mycounter2', "item#{i}") }
93
+ end
94
+
95
+ counter_should_equal(@counter.intersection(['mycounter1', 'mycounter2']), 250)
96
+ counter_should_equal(@counter.intersection(['mycounter1', 'mycounter2'], Time.now.to_i - 3 * WEEKS), 250)
97
+ counter_should_equal(@counter.intersection(['mycounter1', 'mycounter2'], Time.now.to_i - 1 * WEEKS - 3 * DAYS), 150, 250)
98
+ counter_should_equal(@counter.intersection(['mycounter1', 'mycounter2'], Time.now.to_i - 6 * DAYS - 12 * HOURS), 50, 250)
99
+ counter_should_equal(@counter.intersection(['mycounter1', 'mycounter2'], Time.now.to_i - 5 * DAYS - 12 * HOURS), 50, 250)
100
+ counter_should_equal(@counter.intersection(['mycounter1', 'mycounter2'], Time.now.to_i - 4 * DAYS - 12 * HOURS), 50, 250)
101
+ counter_should_equal(@counter.intersection(['mycounter1', 'mycounter2'], Time.now.to_i - 3 * DAYS - 12 * HOURS), 0, 250)
102
+ counter_should_equal(@counter.intersection(['mycounter1', 'mycounter2'], Time.now.to_i - 2 * DAYS), 0, 250)
103
+ end
104
+
105
+ it "can use union_store to store snapshots of counters at particular points in time" do
106
+ Timecop.travel(Time.now - 2 * WEEKS) do
107
+ (0..100).each { |i| @counter.add('mycounter1', "item#{i}") }
108
+ end
109
+ Timecop.travel(Time.now - 1 * WEEKS) do
110
+ (100..200).each { |i| @counter.add('mycounter2', "item#{i}") }
111
+ end
112
+ Timecop.travel(Time.now - 6 * DAYS) do
113
+ (0..100).each { |i| @counter.add('mycounter2', "item#{i}") }
114
+ end
115
+ Timecop.travel(Time.now - 5 * DAYS) do
116
+ (100..200).each { |i| @counter.add('mycounter1', "item#{i}") }
117
+ end
118
+ Timecop.travel(Time.now - 4 * DAYS) do
119
+ (200..250).each { |i| @counter.add('mycounter1', "item#{i}") }
120
+ end
121
+ Timecop.travel(Time.now - 3 * DAYS) do
122
+ (200..250).each { |i| @counter.add('mycounter2', "item#{i}") }
123
+ end
124
+
125
+ @counter.union_store('counter1_1_week_ago', ['mycounter1'], Time.now.to_i - 1 * WEEKS)
126
+ @counter.union_store('counter2_5_days_ago', ['mycounter2'], Time.now.to_i - 5 * DAYS)
127
+ counter_should_equal(@counter.union(['counter1_1_week_ago', 'counter2_5_days_ago']), 150, 250)
128
+ end
129
+
130
+ it "allows you to override the time an event is registered when it's added" do
131
+ (0..1000).each { |i| @counter.add('mycounter', "item#{i}", Time.now.to_i - 3 * WEEKS) }
132
+ (1000..2000).each { |i| @counter.add('mycounter', "item#{i}", Time.now.to_i - 2 * WEEKS) }
133
+ (2000..3000).each { |i| @counter.add('mycounter', "item#{i}", Time.now.to_i - 1 * WEEKS) }
134
+ (3000..4000).each { |i| @counter.add('mycounter', "item#{i}") }
135
+
136
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 4 * WEEKS), 4000)
137
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 2 * WEEKS - 3 * DAYS), 3000)
138
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 1 * WEEKS - 3 * DAYS), 2000)
139
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 3 * DAYS), 1000)
140
+ end
141
+
142
+ it "doesn't screw up more recent counts when items are injected with earlier timestamp overrides" do
143
+ Timecop.travel(Time.now - 3 * WEEKS) do
144
+ (0..1000).each { |i| @counter.add('mycounter', "item#{i}") }
145
+ end
146
+
147
+ Timecop.travel(Time.now - 2 * WEEKS) do
148
+ (1000..2000).each { |i| @counter.add('mycounter', "item#{i}") }
149
+ end
150
+
151
+ Timecop.travel(Time.now - 1 * WEEKS) do
152
+ (2000..3000).each { |i| @counter.add('mycounter', "item#{i}") }
153
+ end
154
+
155
+ Timecop.travel(Time.now - 2 * DAYS) do
156
+ (1000..2000).each { |i| @counter.add('mycounter', "item#{i}") }
157
+ end
158
+
159
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 4 * WEEKS), 3000)
160
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 2 * WEEKS - 3 * DAYS), 2000)
161
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 1 * WEEKS - 3 * DAYS), 2000)
162
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 3 * DAYS), 1000)
163
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 1 * DAYS), 0)
164
+
165
+ # Shouldn't change counts, since they're updates to counts that happen later
166
+ # than the time we're trying to inject
167
+ (1000..2000).each { |i| @counter.add('mycounter', "item#{i}", Time.now.to_i - 1 * WEEKS) }
168
+
169
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 4 * WEEKS), 3000)
170
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 2 * WEEKS - 3 * DAYS), 2000)
171
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 1 * WEEKS - 3 * DAYS), 2000)
172
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 3 * DAYS), 1000)
173
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 1 * DAYS), 0)
174
+
175
+ # Should change counts, since they're updates to counts for items we've never
176
+ # seen before in the past
177
+ (3000..4000).each { |i| @counter.add('mycounter', "item#{i}", Time.now.to_i - 1 * WEEKS) }
178
+
179
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 4 * WEEKS), 4000)
180
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 2 * WEEKS - 3 * DAYS), 3000)
181
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 1 * WEEKS - 3 * DAYS), 3000)
182
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 3 * DAYS), 1000)
183
+ counter_should_equal(@counter.count('mycounter', Time.now.to_i - 1 * DAYS), 0)
184
+ end
185
+
186
+ it "can compute deltas over time on events correctly" do
187
+ # A larger-scale test that simulates user join events and tests that we can get
188
+ # week-by-week deltas. Generate new user counts according to the following
189
+ # weekly schedule: 55780 during the first week, 300 more during the next week,
190
+ # 10 more the next week, etc.
191
+
192
+ schedule = [55780, 300, 10, 4000, 1000, 1000, 5000, 15000, 30000, 3000]
193
+ schedule.each_with_index do |num_users, i|
194
+ Timecop.travel(Time.now - (schedule.length * WEEKS) + (i * WEEKS)) do
195
+ num_users.times do |i|
196
+ Timecop.travel(Time.now + 2 * HOURS + i) do
197
+ @counter.add("users", "user#{SecureRandom.uuid}")
198
+ end
199
+ end
200
+ end
201
+ end
202
+
203
+ actual_total = schedule.reduce(:+)
204
+ estimated_total = @counter.count("users")
205
+ (actual_total - estimated_total).abs.should < @expected_relative_error * actual_total
206
+
207
+ # Go through the schedule, computing week-by-week deltas and comparing them to the
208
+ # scheduled additions.
209
+
210
+ schedule.each_with_index do |users_joined, i|
211
+ week = schedule.length - 1 - i
212
+ c = @counter.count('users', Time.now.to_i - (week+1) * WEEKS) - @counter.count('users', Time.now.to_i - week * WEEKS)
213
+ (users_joined - c).abs.should < @expected_relative_error * schedule.reduce(:+)
214
+ end
215
+ end
216
+ end