hyperloglog-redis 1.0.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +1 -0
- data/Gemfile.lock +2 -0
- data/HISTORY.md +25 -6
- data/README.md +124 -25
- data/VERSION +1 -1
- data/hyperloglog-redis.gemspec +10 -4
- data/lib/algorithm.rb +69 -0
- data/lib/counter.rb +40 -0
- data/lib/hyperloglog-redis.rb +3 -1
- data/lib/time_series_counter.rb +80 -0
- data/spec/hyper_log_log_spec.rb +199 -197
- data/spec/time_series_counter_spec.rb +216 -0
- metadata +28 -14
- data/lib/hyper_log_log.rb +0 -96
data/spec/hyper_log_log_spec.rb
CHANGED
@@ -1,224 +1,226 @@
|
|
1
1
|
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
2
|
|
3
3
|
describe HyperLogLog do
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
4
|
+
|
5
|
+
[HyperLogLog::Counter, HyperLogLog::TimeSeriesCounter].each do |counter_type|
|
6
|
+
|
7
|
+
it "doesn't change its count when it sees values that it's already seen" do
|
8
|
+
redis = Redis.new
|
9
|
+
counter = counter_type.new(redis, 10)
|
10
|
+
test_set = (1..100).map{ |x| x.to_s }
|
11
|
+
test_set.each{ |value| counter.add("mycounter", value) }
|
12
|
+
original_estimate = counter.count("mycounter")
|
13
|
+
5.times do
|
14
|
+
test_set.each do |value|
|
15
|
+
counter.add("mycounter", value)
|
16
|
+
counter.count("mycounter").should == original_estimate
|
17
|
+
end
|
15
18
|
end
|
16
19
|
end
|
17
|
-
end
|
18
20
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
21
|
+
it "can maintain more than one logically distinct counter" do
|
22
|
+
redis = Redis.new
|
23
|
+
counter = counter_type.new(redis, 10)
|
24
|
+
other_estimate = counter.count("counter2")
|
25
|
+
(1..100).each do |i|
|
26
|
+
counter.add("counter1", i.to_s)
|
27
|
+
counter.count("counter2").should == other_estimate
|
28
|
+
end
|
29
|
+
other_estimate = counter.count("counter1")
|
30
|
+
(101..200).each do |i|
|
31
|
+
counter.add("counter2", i.to_s)
|
32
|
+
counter.count("counter1").should == other_estimate
|
33
|
+
end
|
34
|
+
other_estimate = counter.count("counter2")
|
35
|
+
(201..300).each do |i|
|
36
|
+
counter.add("counter1", i.to_s)
|
37
|
+
counter.count("counter2").should == other_estimate
|
38
|
+
end
|
39
|
+
counter.count("counter1").should > 100
|
40
|
+
counter.count("counter2").should > 50
|
41
|
+
counter.count("counter1").should > counter.count("counter2")
|
26
42
|
end
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
counter.
|
43
|
+
|
44
|
+
it "can exactly count small sets" do
|
45
|
+
redis = Redis.new
|
46
|
+
counter = counter_type.new(redis, 11)
|
47
|
+
10.times { |i| counter.add("mycounter", i.to_s) }
|
48
|
+
counter.count("mycounter").should == 10
|
31
49
|
end
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
counter.
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
50
|
+
|
51
|
+
it "can exactly count small unions" do
|
52
|
+
redis = Redis.new
|
53
|
+
counter = counter_type.new(redis, 11)
|
54
|
+
(1..8).each { |i| counter.add("mycounter1", i.to_s) }
|
55
|
+
(5..12).each { |i| counter.add("mycounter2", i.to_s) }
|
56
|
+
counter.union(["mycounter1", "mycounter2"]).should == 12
|
57
|
+
end
|
58
|
+
|
59
|
+
it "can exactly count small intersections" do
|
60
|
+
redis = Redis.new
|
61
|
+
counter = counter_type.new(redis, 11)
|
62
|
+
(1..8).each { |i| counter.add("mycounter1", i.to_s) }
|
63
|
+
(5..12).each { |i| counter.add("mycounter2", i.to_s) }
|
64
|
+
counter.intersection(["mycounter1", "mycounter2"]).should == 4
|
65
|
+
end
|
66
|
+
|
67
|
+
it "can store unions for querying later" do
|
68
|
+
redis = Redis.new
|
69
|
+
counter = counter_type.new(redis, 11)
|
70
|
+
(1..10).each { |i| counter.add("mycounter1", i.to_s) }
|
71
|
+
(5..15).each { |i| counter.add("mycounter2", i.to_s) }
|
72
|
+
(15..25).each { |i| counter.add("mycounter3", i.to_s) }
|
73
|
+
(20..50).each { |i| counter.add("mycounter4", i.to_s) }
|
74
|
+
counter.union_store("aggregate_counter", ["mycounter1", "mycounter2", "mycounter3", "mycounter4"])
|
75
|
+
counter.union(["mycounter1", "mycounter2", "mycounter3", "mycounter4"]).should == counter.count("aggregate_counter")
|
76
|
+
end
|
77
|
+
|
78
|
+
# With parameter b, HyperLogLog should produce estimates that have
|
79
|
+
# relative error of 1.04 / Math.sqrt(2 ** b). Of course, this analysis
|
80
|
+
# is based on assumptions that aren't necessarily true in practice and
|
81
|
+
# the observed relative error will depend on the distribution of data
|
82
|
+
# we receive as well as the interaction of the murmur hash implementation
|
83
|
+
# with that data. Keeping that in mind, the following spec makes sure
|
84
|
+
# that in the process of adding 1000 values to a set, HyperLogLog only
|
85
|
+
# gives bad estimates (more than twice the expected relative error) in
|
86
|
+
# less than 1% of the cases and never gives very bad estimates (more than
|
87
|
+
# three times the expected relative error.)
|
88
|
+
#
|
89
|
+
# It's fine to fudge these numbers a little if the implementation changes,
|
90
|
+
# since you can clearly find a different set of values that make this test
|
91
|
+
# fail even without changing the implementation. But it should serve as a
|
92
|
+
# good indication that there aren't any logical errors in the HyperLogLog
|
93
|
+
# implementation, since it exercises all of the cases in HyperLogLog's
|
94
|
+
# count method except for the correction for very large set sizes.
|
95
|
+
|
96
|
+
it "produces acceptable estimates for counts" do
|
97
|
+
max_items = 1000
|
98
|
+
redis = Redis.new
|
99
|
+
(6..16).each do |b|
|
100
|
+
counter = counter_type.new(redis, b)
|
101
|
+
redis.del('mycounter')
|
102
|
+
bad_estimates = 0
|
103
|
+
very_bad_estimates = 0
|
104
|
+
expected_relative_error = 1.04 / Math.sqrt(2 ** b)
|
105
|
+
max_items.times do |i|
|
106
|
+
value = Digest::MD5.hexdigest("value#{i}")
|
107
|
+
counter.add("mycounter", value)
|
108
|
+
actual = i + 1
|
109
|
+
approximate = counter.count("mycounter")
|
110
|
+
relative_error = (actual - approximate).abs / Float(actual)
|
111
|
+
bad_estimates += 1 if relative_error > expected_relative_error * 2
|
112
|
+
very_bad_estimates += 1 if relative_error > expected_relative_error * 3
|
113
|
+
end
|
114
|
+
bad_estimates.should < max_items / 100.00
|
115
|
+
very_bad_estimates.should == 0
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
it "produces acceptable estimates for unions with few elements in common" do
|
120
|
+
b, max_items = 10, 2000
|
121
|
+
counter = counter_type.new(Redis.new, b)
|
100
122
|
bad_estimates = 0
|
101
123
|
very_bad_estimates = 0
|
102
124
|
expected_relative_error = 1.04 / Math.sqrt(2 ** b)
|
103
125
|
max_items.times do |i|
|
104
|
-
|
105
|
-
counter.add("
|
106
|
-
|
107
|
-
|
126
|
+
value1 = Digest::MD5.hexdigest("value#{i}")
|
127
|
+
counter.add("mycounter1", value1)
|
128
|
+
value2 = Digest::MD5.hexdigest("value#{i}incounter2")
|
129
|
+
counter.add("mycounter2", value2)
|
130
|
+
value3 = Digest::MD5.hexdigest("this is value#{i}")
|
131
|
+
counter.add("mycounter3", value3)
|
132
|
+
actual = 3 * (i + 1)
|
133
|
+
approximate = counter.union(["mycounter1", "mycounter2", "mycounter3"])
|
108
134
|
relative_error = (actual - approximate).abs / Float(actual)
|
109
135
|
bad_estimates += 1 if relative_error > expected_relative_error * 2
|
110
136
|
very_bad_estimates += 1 if relative_error > expected_relative_error * 3
|
111
137
|
end
|
112
|
-
bad_estimates.should < max_items / 100.00
|
138
|
+
bad_estimates.should < (3 * max_items) / 100.00
|
113
139
|
very_bad_estimates.should == 0
|
114
140
|
end
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
value3 = Digest::MD5.hexdigest("this is value#{i}")
|
129
|
-
counter.add("mycounter3", value3)
|
130
|
-
actual = 3 * (i + 1)
|
131
|
-
approximate = counter.union("mycounter1", "mycounter2", "mycounter3")
|
132
|
-
relative_error = (actual - approximate).abs / Float(actual)
|
133
|
-
bad_estimates += 1 if relative_error > expected_relative_error * 2
|
134
|
-
very_bad_estimates += 1 if relative_error > expected_relative_error * 3
|
135
|
-
end
|
136
|
-
bad_estimates.should < (3 * max_items) / 100.00
|
137
|
-
very_bad_estimates.should == 0
|
138
|
-
end
|
139
|
-
|
140
|
-
it "produces acceptable estimates for unions with many elements in common" do
|
141
|
-
b, max_items, intersection_size = 10, 1000, 2000
|
142
|
-
counter = HyperLogLog.new(Redis.new, b)
|
143
|
-
bad_estimates = 0
|
144
|
-
very_bad_estimates = 0
|
145
|
-
expected_relative_error = 1.04 / Math.sqrt(2 ** b)
|
146
|
-
|
147
|
-
intersection_size.times do |i|
|
148
|
-
value = Digest::MD5.hexdigest("test#{i}value")
|
149
|
-
['mycounter1', 'mycounter2', 'mycounter3'].each do |counter_name|
|
150
|
-
counter.add(counter_name, value)
|
141
|
+
|
142
|
+
it "produces acceptable estimates for unions with many elements in common" do
|
143
|
+
b, max_items, intersection_size = 10, 1000, 2000
|
144
|
+
counter = counter_type.new(Redis.new, b)
|
145
|
+
bad_estimates = 0
|
146
|
+
very_bad_estimates = 0
|
147
|
+
expected_relative_error = 1.04 / Math.sqrt(2 ** b)
|
148
|
+
|
149
|
+
intersection_size.times do |i|
|
150
|
+
value = Digest::MD5.hexdigest("test#{i}value")
|
151
|
+
['mycounter1', 'mycounter2', 'mycounter3'].each do |counter_name|
|
152
|
+
counter.add(counter_name, value)
|
153
|
+
end
|
151
154
|
end
|
155
|
+
|
156
|
+
max_items.times do |i|
|
157
|
+
value1 = Digest::MD5.hexdigest("value#{i}")
|
158
|
+
counter.add("mycounter1", value1)
|
159
|
+
value2 = Digest::MD5.hexdigest("value#{i}isincounter2")
|
160
|
+
counter.add("mycounter2", value2)
|
161
|
+
value3 = Digest::MD5.hexdigest("this is value#{i}")
|
162
|
+
counter.add("mycounter3", value3)
|
163
|
+
actual = 3 * (i + 1) + intersection_size
|
164
|
+
approximate = counter.union(["mycounter1", "mycounter2", "mycounter3"])
|
165
|
+
relative_error = (actual - approximate).abs / Float(actual)
|
166
|
+
bad_estimates += 1 if relative_error > expected_relative_error * 2
|
167
|
+
very_bad_estimates += 1 if relative_error > expected_relative_error * 3
|
168
|
+
end
|
169
|
+
|
170
|
+
bad_estimates.should < ((3 * max_items) + intersection_size) / 100.00
|
171
|
+
very_bad_estimates.should == 0
|
152
172
|
end
|
153
173
|
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
value2 = Digest::MD5.hexdigest("value#{i}isincounter2")
|
158
|
-
counter.add("mycounter2", value2)
|
159
|
-
value3 = Digest::MD5.hexdigest("this is value#{i}")
|
160
|
-
counter.add("mycounter3", value3)
|
161
|
-
actual = 3 * (i + 1) + intersection_size
|
162
|
-
approximate = counter.union("mycounter1", "mycounter2", "mycounter3")
|
163
|
-
relative_error = (actual - approximate).abs / Float(actual)
|
164
|
-
bad_estimates += 1 if relative_error > expected_relative_error * 2
|
165
|
-
very_bad_estimates += 1 if relative_error > expected_relative_error * 3
|
166
|
-
end
|
167
|
-
|
168
|
-
bad_estimates.should < ((3 * max_items) + intersection_size) / 100.00
|
169
|
-
very_bad_estimates.should == 0
|
170
|
-
end
|
171
|
-
|
172
|
-
# There are no good theoretical guarantees that I know of for arbitrary
|
173
|
-
# intersection estimation, since it's expessed as the sum of unions of
|
174
|
-
# HyperLogLog counters, but it tends to work okay in practice, as seen below.
|
175
|
-
|
176
|
-
it "produces decent estimates for intersections" do
|
177
|
-
b, max_items = 6, 1000
|
178
|
-
counter = HyperLogLog.new(Redis.new, b)
|
179
|
-
expected_relative_error = 1.04 / Math.sqrt(2 ** b)
|
180
|
-
|
181
|
-
max_items.times do |i|
|
182
|
-
value1 = Digest::MD5.hexdigest("first-value#{i}")
|
183
|
-
value2 = Digest::MD5.hexdigest("second-value#{i}")
|
184
|
-
value3 = Digest::MD5.hexdigest("third-value#{i}")
|
185
|
-
value4 = Digest::MD5.hexdigest("fourth-value#{i}")
|
186
|
-
counter.add("mycounter1", value1)
|
187
|
-
counter.add("mycounter2", value2)
|
188
|
-
counter.add("mycounter3", value3)
|
189
|
-
counter.add("mycounter4", value4)
|
190
|
-
[value1, value2, value3, value4].each{ |value| counter.add("mycounter5", value) }
|
191
|
-
end
|
192
|
-
|
193
|
-
small_counters = ['mycounter1', 'mycounter2', 'mycounter3', 'mycounter4']
|
174
|
+
# There are no good theoretical guarantees that I know of for arbitrary
|
175
|
+
# intersection estimation, since it's expessed as the sum of unions of
|
176
|
+
# HyperLogLog counters, but it tends to work okay in practice, as seen below.
|
194
177
|
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
178
|
+
it "produces decent estimates for intersections" do
|
179
|
+
b, max_items = 6, 1000
|
180
|
+
counter = counter_type.new(Redis.new, b)
|
181
|
+
expected_relative_error = 1.04 / Math.sqrt(2 ** b)
|
182
|
+
|
183
|
+
max_items.times do |i|
|
184
|
+
value1 = Digest::MD5.hexdigest("first-value#{i}")
|
185
|
+
value2 = Digest::MD5.hexdigest("second-value#{i}")
|
186
|
+
value3 = Digest::MD5.hexdigest("third-value#{i}")
|
187
|
+
value4 = Digest::MD5.hexdigest("fourth-value#{i}")
|
188
|
+
counter.add("mycounter1", value1)
|
189
|
+
counter.add("mycounter2", value2)
|
190
|
+
counter.add("mycounter3", value3)
|
191
|
+
counter.add("mycounter4", value4)
|
192
|
+
[value1, value2, value3, value4].each{ |value| counter.add("mycounter5", value) }
|
206
193
|
end
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
194
|
+
|
195
|
+
small_counters = ['mycounter1', 'mycounter2', 'mycounter3', 'mycounter4']
|
196
|
+
|
197
|
+
small_counters.each do |counter_name|
|
198
|
+
intersection_estimate = counter.intersection([counter_name, 'mycounter5'])
|
199
|
+
intersection_estimate.should > 0
|
200
|
+
(intersection_estimate - counter.count(counter_name)).abs.should < max_items * expected_relative_error
|
201
|
+
end
|
202
|
+
|
203
|
+
[2,3].each do |intersection_size|
|
204
|
+
small_counters.combination(intersection_size).each do |counter_names|
|
205
|
+
intersection_estimate = counter.intersection(counter_names)
|
206
|
+
intersection_estimate.should >= 0
|
207
|
+
intersection_estimate.should < intersection_size * max_items * expected_relative_error
|
208
|
+
end
|
219
209
|
end
|
210
|
+
|
211
|
+
100.times do |i|
|
212
|
+
value = Digest::MD5.hexdigest("somethingintheintersection#{i}")
|
213
|
+
small_counters.each { |counter_name| counter.add(counter_name, value) }
|
214
|
+
end
|
215
|
+
|
216
|
+
[2,3,4].each do |intersection_size|
|
217
|
+
small_counters.combination(intersection_size).each do |counter_names|
|
218
|
+
intersection_estimate = counter.intersection(counter_names)
|
219
|
+
intersection_estimate.should >= 0
|
220
|
+
(intersection_estimate - 100).abs.should < intersection_size * (max_items + 100) * expected_relative_error
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
220
224
|
end
|
221
|
-
|
222
225
|
end
|
223
|
-
|
224
226
|
end
|
@@ -0,0 +1,216 @@
|
|
1
|
+
require 'securerandom'
|
2
|
+
require 'timecop'
|
3
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
4
|
+
|
5
|
+
MINUTES=60
|
6
|
+
HOURS=MINUTES*60
|
7
|
+
DAYS=HOURS*24
|
8
|
+
WEEKS=DAYS*7
|
9
|
+
|
10
|
+
describe HyperLogLog::TimeSeriesCounter do
|
11
|
+
|
12
|
+
before(:each) do
|
13
|
+
@b = 11
|
14
|
+
@redis = Redis.new
|
15
|
+
@counter = HyperLogLog::TimeSeriesCounter.new(@redis, @b)
|
16
|
+
@expected_relative_error = 1.04 / Math.sqrt(2 ** @b)
|
17
|
+
|
18
|
+
def counter_should_equal(counter_val, expected_val, relative_error_base=nil)
|
19
|
+
(counter_val - expected_val).abs.should <= (relative_error_base || expected_val) * @expected_relative_error
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
it "can estimate cardinalities from any particular point in time until the present" do
|
24
|
+
Timecop.travel(Time.now - 2 * WEEKS) do
|
25
|
+
(0..100).each { |i| @counter.add('mycounter', "item#{i}") }
|
26
|
+
end
|
27
|
+
Timecop.travel(Time.now - 1 * WEEKS) do
|
28
|
+
(100..200).each { |i| @counter.add('mycounter', "item#{i}") }
|
29
|
+
end
|
30
|
+
Timecop.travel(Time.now - 6 * DAYS) do
|
31
|
+
(0..100).each { |i| @counter.add('mycounter', "item#{i}") }
|
32
|
+
end
|
33
|
+
Timecop.travel(Time.now - 5 * DAYS) do
|
34
|
+
(100..200).each { |i| @counter.add('mycounter', "item#{i}") }
|
35
|
+
end
|
36
|
+
Timecop.travel(Time.now - 4 * DAYS) do
|
37
|
+
(200..250).each { |i| @counter.add('mycounter', "item#{i}") }
|
38
|
+
end
|
39
|
+
|
40
|
+
counter_should_equal(@counter.count('mycounter'), 250)
|
41
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 3 * WEEKS), 250)
|
42
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 1 * WEEKS - 3 * DAYS), 250)
|
43
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 1 * WEEKS), 250)
|
44
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 5 * DAYS - 12 * HOURS), 150, 250)
|
45
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 4 * DAYS - 12 * HOURS), 50, 250)
|
46
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 3 * DAYS), 0, 250)
|
47
|
+
end
|
48
|
+
|
49
|
+
it "can estimate unions from any particular point in time until the present" do
|
50
|
+
Timecop.travel(Time.now - 2 * WEEKS) do
|
51
|
+
(0..100).each { |i| @counter.add('mycounter1', "item#{i}") }
|
52
|
+
end
|
53
|
+
Timecop.travel(Time.now - 1 * WEEKS) do
|
54
|
+
(100..200).each { |i| @counter.add('mycounter2', "item#{i}") }
|
55
|
+
end
|
56
|
+
Timecop.travel(Time.now - 6 * DAYS) do
|
57
|
+
(0..100).each { |i| @counter.add('mycounter1', "item#{i}") }
|
58
|
+
end
|
59
|
+
Timecop.travel(Time.now - 5 * DAYS) do
|
60
|
+
(100..200).each { |i| @counter.add('mycounter2', "item#{i}") }
|
61
|
+
end
|
62
|
+
Timecop.travel(Time.now - 4 * DAYS) do
|
63
|
+
(200..250).each { |i| @counter.add('mycounter1', "item#{i}") }
|
64
|
+
end
|
65
|
+
|
66
|
+
counter_should_equal(@counter.union(['mycounter1', 'mycounter2']), 250)
|
67
|
+
counter_should_equal(@counter.union(['mycounter1', 'mycounter2'], Time.now.to_i - 3 * WEEKS), 250)
|
68
|
+
counter_should_equal(@counter.union(['mycounter1', 'mycounter2'], Time.now.to_i - 1 * WEEKS - 3 * DAYS), 250)
|
69
|
+
counter_should_equal(@counter.union(['mycounter1', 'mycounter2'], Time.now.to_i - 1 * WEEKS), 250)
|
70
|
+
counter_should_equal(@counter.union(['mycounter1', 'mycounter2'], Time.now.to_i - 5 * DAYS - 12 * HOURS), 150, 250)
|
71
|
+
counter_should_equal(@counter.union(['mycounter1', 'mycounter2'], Time.now.to_i - 4 * DAYS - 12 * HOURS), 50, 250)
|
72
|
+
counter_should_equal(@counter.union(['mycounter1', 'mycounter2'], Time.now.to_i - 3 * DAYS), 0, 250)
|
73
|
+
end
|
74
|
+
|
75
|
+
it "can estimate intersections from any particular point in time until the present" do
|
76
|
+
Timecop.travel(Time.now - 2 * WEEKS) do
|
77
|
+
(0..100).each { |i| @counter.add('mycounter1', "item#{i}") }
|
78
|
+
end
|
79
|
+
Timecop.travel(Time.now - 1 * WEEKS) do
|
80
|
+
(100..200).each { |i| @counter.add('mycounter2', "item#{i}") }
|
81
|
+
end
|
82
|
+
Timecop.travel(Time.now - 6 * DAYS) do
|
83
|
+
(0..100).each { |i| @counter.add('mycounter2', "item#{i}") }
|
84
|
+
end
|
85
|
+
Timecop.travel(Time.now - 5 * DAYS) do
|
86
|
+
(100..200).each { |i| @counter.add('mycounter1', "item#{i}") }
|
87
|
+
end
|
88
|
+
Timecop.travel(Time.now - 4 * DAYS) do
|
89
|
+
(200..250).each { |i| @counter.add('mycounter1', "item#{i}") }
|
90
|
+
end
|
91
|
+
Timecop.travel(Time.now - 3 * DAYS) do
|
92
|
+
(200..250).each { |i| @counter.add('mycounter2', "item#{i}") }
|
93
|
+
end
|
94
|
+
|
95
|
+
counter_should_equal(@counter.intersection(['mycounter1', 'mycounter2']), 250)
|
96
|
+
counter_should_equal(@counter.intersection(['mycounter1', 'mycounter2'], Time.now.to_i - 3 * WEEKS), 250)
|
97
|
+
counter_should_equal(@counter.intersection(['mycounter1', 'mycounter2'], Time.now.to_i - 1 * WEEKS - 3 * DAYS), 150, 250)
|
98
|
+
counter_should_equal(@counter.intersection(['mycounter1', 'mycounter2'], Time.now.to_i - 6 * DAYS - 12 * HOURS), 50, 250)
|
99
|
+
counter_should_equal(@counter.intersection(['mycounter1', 'mycounter2'], Time.now.to_i - 5 * DAYS - 12 * HOURS), 50, 250)
|
100
|
+
counter_should_equal(@counter.intersection(['mycounter1', 'mycounter2'], Time.now.to_i - 4 * DAYS - 12 * HOURS), 50, 250)
|
101
|
+
counter_should_equal(@counter.intersection(['mycounter1', 'mycounter2'], Time.now.to_i - 3 * DAYS - 12 * HOURS), 0, 250)
|
102
|
+
counter_should_equal(@counter.intersection(['mycounter1', 'mycounter2'], Time.now.to_i - 2 * DAYS), 0, 250)
|
103
|
+
end
|
104
|
+
|
105
|
+
it "can use union_store to store snapshots of counters at particular points in time" do
|
106
|
+
Timecop.travel(Time.now - 2 * WEEKS) do
|
107
|
+
(0..100).each { |i| @counter.add('mycounter1', "item#{i}") }
|
108
|
+
end
|
109
|
+
Timecop.travel(Time.now - 1 * WEEKS) do
|
110
|
+
(100..200).each { |i| @counter.add('mycounter2', "item#{i}") }
|
111
|
+
end
|
112
|
+
Timecop.travel(Time.now - 6 * DAYS) do
|
113
|
+
(0..100).each { |i| @counter.add('mycounter2', "item#{i}") }
|
114
|
+
end
|
115
|
+
Timecop.travel(Time.now - 5 * DAYS) do
|
116
|
+
(100..200).each { |i| @counter.add('mycounter1', "item#{i}") }
|
117
|
+
end
|
118
|
+
Timecop.travel(Time.now - 4 * DAYS) do
|
119
|
+
(200..250).each { |i| @counter.add('mycounter1', "item#{i}") }
|
120
|
+
end
|
121
|
+
Timecop.travel(Time.now - 3 * DAYS) do
|
122
|
+
(200..250).each { |i| @counter.add('mycounter2', "item#{i}") }
|
123
|
+
end
|
124
|
+
|
125
|
+
@counter.union_store('counter1_1_week_ago', ['mycounter1'], Time.now.to_i - 1 * WEEKS)
|
126
|
+
@counter.union_store('counter2_5_days_ago', ['mycounter2'], Time.now.to_i - 5 * DAYS)
|
127
|
+
counter_should_equal(@counter.union(['counter1_1_week_ago', 'counter2_5_days_ago']), 150, 250)
|
128
|
+
end
|
129
|
+
|
130
|
+
it "allows you to override the time an event is registered when it's added" do
|
131
|
+
(0..1000).each { |i| @counter.add('mycounter', "item#{i}", Time.now.to_i - 3 * WEEKS) }
|
132
|
+
(1000..2000).each { |i| @counter.add('mycounter', "item#{i}", Time.now.to_i - 2 * WEEKS) }
|
133
|
+
(2000..3000).each { |i| @counter.add('mycounter', "item#{i}", Time.now.to_i - 1 * WEEKS) }
|
134
|
+
(3000..4000).each { |i| @counter.add('mycounter', "item#{i}") }
|
135
|
+
|
136
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 4 * WEEKS), 4000)
|
137
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 2 * WEEKS - 3 * DAYS), 3000)
|
138
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 1 * WEEKS - 3 * DAYS), 2000)
|
139
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 3 * DAYS), 1000)
|
140
|
+
end
|
141
|
+
|
142
|
+
it "doesn't screw up more recent counts when items are injected with earlier timestamp overrides" do
|
143
|
+
Timecop.travel(Time.now - 3 * WEEKS) do
|
144
|
+
(0..1000).each { |i| @counter.add('mycounter', "item#{i}") }
|
145
|
+
end
|
146
|
+
|
147
|
+
Timecop.travel(Time.now - 2 * WEEKS) do
|
148
|
+
(1000..2000).each { |i| @counter.add('mycounter', "item#{i}") }
|
149
|
+
end
|
150
|
+
|
151
|
+
Timecop.travel(Time.now - 1 * WEEKS) do
|
152
|
+
(2000..3000).each { |i| @counter.add('mycounter', "item#{i}") }
|
153
|
+
end
|
154
|
+
|
155
|
+
Timecop.travel(Time.now - 2 * DAYS) do
|
156
|
+
(1000..2000).each { |i| @counter.add('mycounter', "item#{i}") }
|
157
|
+
end
|
158
|
+
|
159
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 4 * WEEKS), 3000)
|
160
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 2 * WEEKS - 3 * DAYS), 2000)
|
161
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 1 * WEEKS - 3 * DAYS), 2000)
|
162
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 3 * DAYS), 1000)
|
163
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 1 * DAYS), 0)
|
164
|
+
|
165
|
+
# Shouldn't change counts, since they're updates to counts that happen later
|
166
|
+
# than the time we're trying to inject
|
167
|
+
(1000..2000).each { |i| @counter.add('mycounter', "item#{i}", Time.now.to_i - 1 * WEEKS) }
|
168
|
+
|
169
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 4 * WEEKS), 3000)
|
170
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 2 * WEEKS - 3 * DAYS), 2000)
|
171
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 1 * WEEKS - 3 * DAYS), 2000)
|
172
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 3 * DAYS), 1000)
|
173
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 1 * DAYS), 0)
|
174
|
+
|
175
|
+
# Should change counts, since they're updates to counts for items we've never
|
176
|
+
# seen before in the past
|
177
|
+
(3000..4000).each { |i| @counter.add('mycounter', "item#{i}", Time.now.to_i - 1 * WEEKS) }
|
178
|
+
|
179
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 4 * WEEKS), 4000)
|
180
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 2 * WEEKS - 3 * DAYS), 3000)
|
181
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 1 * WEEKS - 3 * DAYS), 3000)
|
182
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 3 * DAYS), 1000)
|
183
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 1 * DAYS), 0)
|
184
|
+
end
|
185
|
+
|
186
|
+
it "can compute deltas over time on events correctly" do
|
187
|
+
# A larger-scale test that simulates user join events and tests that we can get
|
188
|
+
# week-by-week deltas. Generate new user counts according to the following
|
189
|
+
# weekly schedule: 55780 during the first week, 300 more during the next week,
|
190
|
+
# 10 more the next week, etc.
|
191
|
+
|
192
|
+
schedule = [55780, 300, 10, 4000, 1000, 1000, 5000, 15000, 30000, 3000]
|
193
|
+
schedule.each_with_index do |num_users, i|
|
194
|
+
Timecop.travel(Time.now - (schedule.length * WEEKS) + (i * WEEKS)) do
|
195
|
+
num_users.times do |i|
|
196
|
+
Timecop.travel(Time.now + 2 * HOURS + i) do
|
197
|
+
@counter.add("users", "user#{SecureRandom.uuid}")
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
actual_total = schedule.reduce(:+)
|
204
|
+
estimated_total = @counter.count("users")
|
205
|
+
(actual_total - estimated_total).abs.should < @expected_relative_error * actual_total
|
206
|
+
|
207
|
+
# Go through the schedule, computing week-by-week deltas and comparing them to the
|
208
|
+
# scheduled additions.
|
209
|
+
|
210
|
+
schedule.each_with_index do |users_joined, i|
|
211
|
+
week = schedule.length - 1 - i
|
212
|
+
c = @counter.count('users', Time.now.to_i - (week+1) * WEEKS) - @counter.count('users', Time.now.to_i - week * WEEKS)
|
213
|
+
(users_joined - c).abs.should < @expected_relative_error * schedule.reduce(:+)
|
214
|
+
end
|
215
|
+
end
|
216
|
+
end
|