hyperloglog-redis 1.0.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +1 -0
- data/Gemfile.lock +2 -0
- data/HISTORY.md +25 -6
- data/README.md +124 -25
- data/VERSION +1 -1
- data/hyperloglog-redis.gemspec +10 -4
- data/lib/algorithm.rb +69 -0
- data/lib/counter.rb +40 -0
- data/lib/hyperloglog-redis.rb +3 -1
- data/lib/time_series_counter.rb +80 -0
- data/spec/hyper_log_log_spec.rb +199 -197
- data/spec/time_series_counter_spec.rb +216 -0
- metadata +28 -14
- data/lib/hyper_log_log.rb +0 -96
data/spec/hyper_log_log_spec.rb
CHANGED
@@ -1,224 +1,226 @@
|
|
1
1
|
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
2
|
|
3
3
|
describe HyperLogLog do
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
4
|
+
|
5
|
+
[HyperLogLog::Counter, HyperLogLog::TimeSeriesCounter].each do |counter_type|
|
6
|
+
|
7
|
+
it "doesn't change its count when it sees values that it's already seen" do
|
8
|
+
redis = Redis.new
|
9
|
+
counter = counter_type.new(redis, 10)
|
10
|
+
test_set = (1..100).map{ |x| x.to_s }
|
11
|
+
test_set.each{ |value| counter.add("mycounter", value) }
|
12
|
+
original_estimate = counter.count("mycounter")
|
13
|
+
5.times do
|
14
|
+
test_set.each do |value|
|
15
|
+
counter.add("mycounter", value)
|
16
|
+
counter.count("mycounter").should == original_estimate
|
17
|
+
end
|
15
18
|
end
|
16
19
|
end
|
17
|
-
end
|
18
20
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
21
|
+
it "can maintain more than one logically distinct counter" do
|
22
|
+
redis = Redis.new
|
23
|
+
counter = counter_type.new(redis, 10)
|
24
|
+
other_estimate = counter.count("counter2")
|
25
|
+
(1..100).each do |i|
|
26
|
+
counter.add("counter1", i.to_s)
|
27
|
+
counter.count("counter2").should == other_estimate
|
28
|
+
end
|
29
|
+
other_estimate = counter.count("counter1")
|
30
|
+
(101..200).each do |i|
|
31
|
+
counter.add("counter2", i.to_s)
|
32
|
+
counter.count("counter1").should == other_estimate
|
33
|
+
end
|
34
|
+
other_estimate = counter.count("counter2")
|
35
|
+
(201..300).each do |i|
|
36
|
+
counter.add("counter1", i.to_s)
|
37
|
+
counter.count("counter2").should == other_estimate
|
38
|
+
end
|
39
|
+
counter.count("counter1").should > 100
|
40
|
+
counter.count("counter2").should > 50
|
41
|
+
counter.count("counter1").should > counter.count("counter2")
|
26
42
|
end
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
counter.
|
43
|
+
|
44
|
+
it "can exactly count small sets" do
|
45
|
+
redis = Redis.new
|
46
|
+
counter = counter_type.new(redis, 11)
|
47
|
+
10.times { |i| counter.add("mycounter", i.to_s) }
|
48
|
+
counter.count("mycounter").should == 10
|
31
49
|
end
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
counter.
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
50
|
+
|
51
|
+
it "can exactly count small unions" do
|
52
|
+
redis = Redis.new
|
53
|
+
counter = counter_type.new(redis, 11)
|
54
|
+
(1..8).each { |i| counter.add("mycounter1", i.to_s) }
|
55
|
+
(5..12).each { |i| counter.add("mycounter2", i.to_s) }
|
56
|
+
counter.union(["mycounter1", "mycounter2"]).should == 12
|
57
|
+
end
|
58
|
+
|
59
|
+
it "can exactly count small intersections" do
|
60
|
+
redis = Redis.new
|
61
|
+
counter = counter_type.new(redis, 11)
|
62
|
+
(1..8).each { |i| counter.add("mycounter1", i.to_s) }
|
63
|
+
(5..12).each { |i| counter.add("mycounter2", i.to_s) }
|
64
|
+
counter.intersection(["mycounter1", "mycounter2"]).should == 4
|
65
|
+
end
|
66
|
+
|
67
|
+
it "can store unions for querying later" do
|
68
|
+
redis = Redis.new
|
69
|
+
counter = counter_type.new(redis, 11)
|
70
|
+
(1..10).each { |i| counter.add("mycounter1", i.to_s) }
|
71
|
+
(5..15).each { |i| counter.add("mycounter2", i.to_s) }
|
72
|
+
(15..25).each { |i| counter.add("mycounter3", i.to_s) }
|
73
|
+
(20..50).each { |i| counter.add("mycounter4", i.to_s) }
|
74
|
+
counter.union_store("aggregate_counter", ["mycounter1", "mycounter2", "mycounter3", "mycounter4"])
|
75
|
+
counter.union(["mycounter1", "mycounter2", "mycounter3", "mycounter4"]).should == counter.count("aggregate_counter")
|
76
|
+
end
|
77
|
+
|
78
|
+
# With parameter b, HyperLogLog should produce estimates that have
|
79
|
+
# relative error of 1.04 / Math.sqrt(2 ** b). Of course, this analysis
|
80
|
+
# is based on assumptions that aren't necessarily true in practice and
|
81
|
+
# the observed relative error will depend on the distribution of data
|
82
|
+
# we receive as well as the interaction of the murmur hash implementation
|
83
|
+
# with that data. Keeping that in mind, the following spec makes sure
|
84
|
+
# that in the process of adding 1000 values to a set, HyperLogLog only
|
85
|
+
# gives bad estimates (more than twice the expected relative error) in
|
86
|
+
# less than 1% of the cases and never gives very bad estimates (more than
|
87
|
+
# three times the expected relative error.)
|
88
|
+
#
|
89
|
+
# It's fine to fudge these numbers a little if the implementation changes,
|
90
|
+
# since you can clearly find a different set of values that make this test
|
91
|
+
# fail even without changing the implementation. But it should serve as a
|
92
|
+
# good indication that there aren't any logical errors in the HyperLogLog
|
93
|
+
# implementation, since it exercises all of the cases in HyperLogLog's
|
94
|
+
# count method except for the correction for very large set sizes.
|
95
|
+
|
96
|
+
it "produces acceptable estimates for counts" do
|
97
|
+
max_items = 1000
|
98
|
+
redis = Redis.new
|
99
|
+
(6..16).each do |b|
|
100
|
+
counter = counter_type.new(redis, b)
|
101
|
+
redis.del('mycounter')
|
102
|
+
bad_estimates = 0
|
103
|
+
very_bad_estimates = 0
|
104
|
+
expected_relative_error = 1.04 / Math.sqrt(2 ** b)
|
105
|
+
max_items.times do |i|
|
106
|
+
value = Digest::MD5.hexdigest("value#{i}")
|
107
|
+
counter.add("mycounter", value)
|
108
|
+
actual = i + 1
|
109
|
+
approximate = counter.count("mycounter")
|
110
|
+
relative_error = (actual - approximate).abs / Float(actual)
|
111
|
+
bad_estimates += 1 if relative_error > expected_relative_error * 2
|
112
|
+
very_bad_estimates += 1 if relative_error > expected_relative_error * 3
|
113
|
+
end
|
114
|
+
bad_estimates.should < max_items / 100.00
|
115
|
+
very_bad_estimates.should == 0
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
it "produces acceptable estimates for unions with few elements in common" do
|
120
|
+
b, max_items = 10, 2000
|
121
|
+
counter = counter_type.new(Redis.new, b)
|
100
122
|
bad_estimates = 0
|
101
123
|
very_bad_estimates = 0
|
102
124
|
expected_relative_error = 1.04 / Math.sqrt(2 ** b)
|
103
125
|
max_items.times do |i|
|
104
|
-
|
105
|
-
counter.add("
|
106
|
-
|
107
|
-
|
126
|
+
value1 = Digest::MD5.hexdigest("value#{i}")
|
127
|
+
counter.add("mycounter1", value1)
|
128
|
+
value2 = Digest::MD5.hexdigest("value#{i}incounter2")
|
129
|
+
counter.add("mycounter2", value2)
|
130
|
+
value3 = Digest::MD5.hexdigest("this is value#{i}")
|
131
|
+
counter.add("mycounter3", value3)
|
132
|
+
actual = 3 * (i + 1)
|
133
|
+
approximate = counter.union(["mycounter1", "mycounter2", "mycounter3"])
|
108
134
|
relative_error = (actual - approximate).abs / Float(actual)
|
109
135
|
bad_estimates += 1 if relative_error > expected_relative_error * 2
|
110
136
|
very_bad_estimates += 1 if relative_error > expected_relative_error * 3
|
111
137
|
end
|
112
|
-
bad_estimates.should < max_items / 100.00
|
138
|
+
bad_estimates.should < (3 * max_items) / 100.00
|
113
139
|
very_bad_estimates.should == 0
|
114
140
|
end
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
value3 = Digest::MD5.hexdigest("this is value#{i}")
|
129
|
-
counter.add("mycounter3", value3)
|
130
|
-
actual = 3 * (i + 1)
|
131
|
-
approximate = counter.union("mycounter1", "mycounter2", "mycounter3")
|
132
|
-
relative_error = (actual - approximate).abs / Float(actual)
|
133
|
-
bad_estimates += 1 if relative_error > expected_relative_error * 2
|
134
|
-
very_bad_estimates += 1 if relative_error > expected_relative_error * 3
|
135
|
-
end
|
136
|
-
bad_estimates.should < (3 * max_items) / 100.00
|
137
|
-
very_bad_estimates.should == 0
|
138
|
-
end
|
139
|
-
|
140
|
-
it "produces acceptable estimates for unions with many elements in common" do
|
141
|
-
b, max_items, intersection_size = 10, 1000, 2000
|
142
|
-
counter = HyperLogLog.new(Redis.new, b)
|
143
|
-
bad_estimates = 0
|
144
|
-
very_bad_estimates = 0
|
145
|
-
expected_relative_error = 1.04 / Math.sqrt(2 ** b)
|
146
|
-
|
147
|
-
intersection_size.times do |i|
|
148
|
-
value = Digest::MD5.hexdigest("test#{i}value")
|
149
|
-
['mycounter1', 'mycounter2', 'mycounter3'].each do |counter_name|
|
150
|
-
counter.add(counter_name, value)
|
141
|
+
|
142
|
+
it "produces acceptable estimates for unions with many elements in common" do
|
143
|
+
b, max_items, intersection_size = 10, 1000, 2000
|
144
|
+
counter = counter_type.new(Redis.new, b)
|
145
|
+
bad_estimates = 0
|
146
|
+
very_bad_estimates = 0
|
147
|
+
expected_relative_error = 1.04 / Math.sqrt(2 ** b)
|
148
|
+
|
149
|
+
intersection_size.times do |i|
|
150
|
+
value = Digest::MD5.hexdigest("test#{i}value")
|
151
|
+
['mycounter1', 'mycounter2', 'mycounter3'].each do |counter_name|
|
152
|
+
counter.add(counter_name, value)
|
153
|
+
end
|
151
154
|
end
|
155
|
+
|
156
|
+
max_items.times do |i|
|
157
|
+
value1 = Digest::MD5.hexdigest("value#{i}")
|
158
|
+
counter.add("mycounter1", value1)
|
159
|
+
value2 = Digest::MD5.hexdigest("value#{i}isincounter2")
|
160
|
+
counter.add("mycounter2", value2)
|
161
|
+
value3 = Digest::MD5.hexdigest("this is value#{i}")
|
162
|
+
counter.add("mycounter3", value3)
|
163
|
+
actual = 3 * (i + 1) + intersection_size
|
164
|
+
approximate = counter.union(["mycounter1", "mycounter2", "mycounter3"])
|
165
|
+
relative_error = (actual - approximate).abs / Float(actual)
|
166
|
+
bad_estimates += 1 if relative_error > expected_relative_error * 2
|
167
|
+
very_bad_estimates += 1 if relative_error > expected_relative_error * 3
|
168
|
+
end
|
169
|
+
|
170
|
+
bad_estimates.should < ((3 * max_items) + intersection_size) / 100.00
|
171
|
+
very_bad_estimates.should == 0
|
152
172
|
end
|
153
173
|
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
value2 = Digest::MD5.hexdigest("value#{i}isincounter2")
|
158
|
-
counter.add("mycounter2", value2)
|
159
|
-
value3 = Digest::MD5.hexdigest("this is value#{i}")
|
160
|
-
counter.add("mycounter3", value3)
|
161
|
-
actual = 3 * (i + 1) + intersection_size
|
162
|
-
approximate = counter.union("mycounter1", "mycounter2", "mycounter3")
|
163
|
-
relative_error = (actual - approximate).abs / Float(actual)
|
164
|
-
bad_estimates += 1 if relative_error > expected_relative_error * 2
|
165
|
-
very_bad_estimates += 1 if relative_error > expected_relative_error * 3
|
166
|
-
end
|
167
|
-
|
168
|
-
bad_estimates.should < ((3 * max_items) + intersection_size) / 100.00
|
169
|
-
very_bad_estimates.should == 0
|
170
|
-
end
|
171
|
-
|
172
|
-
# There are no good theoretical guarantees that I know of for arbitrary
|
173
|
-
# intersection estimation, since it's expessed as the sum of unions of
|
174
|
-
# HyperLogLog counters, but it tends to work okay in practice, as seen below.
|
175
|
-
|
176
|
-
it "produces decent estimates for intersections" do
|
177
|
-
b, max_items = 6, 1000
|
178
|
-
counter = HyperLogLog.new(Redis.new, b)
|
179
|
-
expected_relative_error = 1.04 / Math.sqrt(2 ** b)
|
180
|
-
|
181
|
-
max_items.times do |i|
|
182
|
-
value1 = Digest::MD5.hexdigest("first-value#{i}")
|
183
|
-
value2 = Digest::MD5.hexdigest("second-value#{i}")
|
184
|
-
value3 = Digest::MD5.hexdigest("third-value#{i}")
|
185
|
-
value4 = Digest::MD5.hexdigest("fourth-value#{i}")
|
186
|
-
counter.add("mycounter1", value1)
|
187
|
-
counter.add("mycounter2", value2)
|
188
|
-
counter.add("mycounter3", value3)
|
189
|
-
counter.add("mycounter4", value4)
|
190
|
-
[value1, value2, value3, value4].each{ |value| counter.add("mycounter5", value) }
|
191
|
-
end
|
192
|
-
|
193
|
-
small_counters = ['mycounter1', 'mycounter2', 'mycounter3', 'mycounter4']
|
174
|
+
# There are no good theoretical guarantees that I know of for arbitrary
|
175
|
+
# intersection estimation, since it's expessed as the sum of unions of
|
176
|
+
# HyperLogLog counters, but it tends to work okay in practice, as seen below.
|
194
177
|
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
178
|
+
it "produces decent estimates for intersections" do
|
179
|
+
b, max_items = 6, 1000
|
180
|
+
counter = counter_type.new(Redis.new, b)
|
181
|
+
expected_relative_error = 1.04 / Math.sqrt(2 ** b)
|
182
|
+
|
183
|
+
max_items.times do |i|
|
184
|
+
value1 = Digest::MD5.hexdigest("first-value#{i}")
|
185
|
+
value2 = Digest::MD5.hexdigest("second-value#{i}")
|
186
|
+
value3 = Digest::MD5.hexdigest("third-value#{i}")
|
187
|
+
value4 = Digest::MD5.hexdigest("fourth-value#{i}")
|
188
|
+
counter.add("mycounter1", value1)
|
189
|
+
counter.add("mycounter2", value2)
|
190
|
+
counter.add("mycounter3", value3)
|
191
|
+
counter.add("mycounter4", value4)
|
192
|
+
[value1, value2, value3, value4].each{ |value| counter.add("mycounter5", value) }
|
206
193
|
end
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
194
|
+
|
195
|
+
small_counters = ['mycounter1', 'mycounter2', 'mycounter3', 'mycounter4']
|
196
|
+
|
197
|
+
small_counters.each do |counter_name|
|
198
|
+
intersection_estimate = counter.intersection([counter_name, 'mycounter5'])
|
199
|
+
intersection_estimate.should > 0
|
200
|
+
(intersection_estimate - counter.count(counter_name)).abs.should < max_items * expected_relative_error
|
201
|
+
end
|
202
|
+
|
203
|
+
[2,3].each do |intersection_size|
|
204
|
+
small_counters.combination(intersection_size).each do |counter_names|
|
205
|
+
intersection_estimate = counter.intersection(counter_names)
|
206
|
+
intersection_estimate.should >= 0
|
207
|
+
intersection_estimate.should < intersection_size * max_items * expected_relative_error
|
208
|
+
end
|
219
209
|
end
|
210
|
+
|
211
|
+
100.times do |i|
|
212
|
+
value = Digest::MD5.hexdigest("somethingintheintersection#{i}")
|
213
|
+
small_counters.each { |counter_name| counter.add(counter_name, value) }
|
214
|
+
end
|
215
|
+
|
216
|
+
[2,3,4].each do |intersection_size|
|
217
|
+
small_counters.combination(intersection_size).each do |counter_names|
|
218
|
+
intersection_estimate = counter.intersection(counter_names)
|
219
|
+
intersection_estimate.should >= 0
|
220
|
+
(intersection_estimate - 100).abs.should < intersection_size * (max_items + 100) * expected_relative_error
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
220
224
|
end
|
221
|
-
|
222
225
|
end
|
223
|
-
|
224
226
|
end
|
@@ -0,0 +1,216 @@
|
|
1
|
+
require 'securerandom'
|
2
|
+
require 'timecop'
|
3
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
4
|
+
|
5
|
+
MINUTES=60
|
6
|
+
HOURS=MINUTES*60
|
7
|
+
DAYS=HOURS*24
|
8
|
+
WEEKS=DAYS*7
|
9
|
+
|
10
|
+
describe HyperLogLog::TimeSeriesCounter do
|
11
|
+
|
12
|
+
before(:each) do
|
13
|
+
@b = 11
|
14
|
+
@redis = Redis.new
|
15
|
+
@counter = HyperLogLog::TimeSeriesCounter.new(@redis, @b)
|
16
|
+
@expected_relative_error = 1.04 / Math.sqrt(2 ** @b)
|
17
|
+
|
18
|
+
def counter_should_equal(counter_val, expected_val, relative_error_base=nil)
|
19
|
+
(counter_val - expected_val).abs.should <= (relative_error_base || expected_val) * @expected_relative_error
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
it "can estimate cardinalities from any particular point in time until the present" do
|
24
|
+
Timecop.travel(Time.now - 2 * WEEKS) do
|
25
|
+
(0..100).each { |i| @counter.add('mycounter', "item#{i}") }
|
26
|
+
end
|
27
|
+
Timecop.travel(Time.now - 1 * WEEKS) do
|
28
|
+
(100..200).each { |i| @counter.add('mycounter', "item#{i}") }
|
29
|
+
end
|
30
|
+
Timecop.travel(Time.now - 6 * DAYS) do
|
31
|
+
(0..100).each { |i| @counter.add('mycounter', "item#{i}") }
|
32
|
+
end
|
33
|
+
Timecop.travel(Time.now - 5 * DAYS) do
|
34
|
+
(100..200).each { |i| @counter.add('mycounter', "item#{i}") }
|
35
|
+
end
|
36
|
+
Timecop.travel(Time.now - 4 * DAYS) do
|
37
|
+
(200..250).each { |i| @counter.add('mycounter', "item#{i}") }
|
38
|
+
end
|
39
|
+
|
40
|
+
counter_should_equal(@counter.count('mycounter'), 250)
|
41
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 3 * WEEKS), 250)
|
42
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 1 * WEEKS - 3 * DAYS), 250)
|
43
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 1 * WEEKS), 250)
|
44
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 5 * DAYS - 12 * HOURS), 150, 250)
|
45
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 4 * DAYS - 12 * HOURS), 50, 250)
|
46
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 3 * DAYS), 0, 250)
|
47
|
+
end
|
48
|
+
|
49
|
+
it "can estimate unions from any particular point in time until the present" do
|
50
|
+
Timecop.travel(Time.now - 2 * WEEKS) do
|
51
|
+
(0..100).each { |i| @counter.add('mycounter1', "item#{i}") }
|
52
|
+
end
|
53
|
+
Timecop.travel(Time.now - 1 * WEEKS) do
|
54
|
+
(100..200).each { |i| @counter.add('mycounter2', "item#{i}") }
|
55
|
+
end
|
56
|
+
Timecop.travel(Time.now - 6 * DAYS) do
|
57
|
+
(0..100).each { |i| @counter.add('mycounter1', "item#{i}") }
|
58
|
+
end
|
59
|
+
Timecop.travel(Time.now - 5 * DAYS) do
|
60
|
+
(100..200).each { |i| @counter.add('mycounter2', "item#{i}") }
|
61
|
+
end
|
62
|
+
Timecop.travel(Time.now - 4 * DAYS) do
|
63
|
+
(200..250).each { |i| @counter.add('mycounter1', "item#{i}") }
|
64
|
+
end
|
65
|
+
|
66
|
+
counter_should_equal(@counter.union(['mycounter1', 'mycounter2']), 250)
|
67
|
+
counter_should_equal(@counter.union(['mycounter1', 'mycounter2'], Time.now.to_i - 3 * WEEKS), 250)
|
68
|
+
counter_should_equal(@counter.union(['mycounter1', 'mycounter2'], Time.now.to_i - 1 * WEEKS - 3 * DAYS), 250)
|
69
|
+
counter_should_equal(@counter.union(['mycounter1', 'mycounter2'], Time.now.to_i - 1 * WEEKS), 250)
|
70
|
+
counter_should_equal(@counter.union(['mycounter1', 'mycounter2'], Time.now.to_i - 5 * DAYS - 12 * HOURS), 150, 250)
|
71
|
+
counter_should_equal(@counter.union(['mycounter1', 'mycounter2'], Time.now.to_i - 4 * DAYS - 12 * HOURS), 50, 250)
|
72
|
+
counter_should_equal(@counter.union(['mycounter1', 'mycounter2'], Time.now.to_i - 3 * DAYS), 0, 250)
|
73
|
+
end
|
74
|
+
|
75
|
+
it "can estimate intersections from any particular point in time until the present" do
|
76
|
+
Timecop.travel(Time.now - 2 * WEEKS) do
|
77
|
+
(0..100).each { |i| @counter.add('mycounter1', "item#{i}") }
|
78
|
+
end
|
79
|
+
Timecop.travel(Time.now - 1 * WEEKS) do
|
80
|
+
(100..200).each { |i| @counter.add('mycounter2', "item#{i}") }
|
81
|
+
end
|
82
|
+
Timecop.travel(Time.now - 6 * DAYS) do
|
83
|
+
(0..100).each { |i| @counter.add('mycounter2', "item#{i}") }
|
84
|
+
end
|
85
|
+
Timecop.travel(Time.now - 5 * DAYS) do
|
86
|
+
(100..200).each { |i| @counter.add('mycounter1', "item#{i}") }
|
87
|
+
end
|
88
|
+
Timecop.travel(Time.now - 4 * DAYS) do
|
89
|
+
(200..250).each { |i| @counter.add('mycounter1', "item#{i}") }
|
90
|
+
end
|
91
|
+
Timecop.travel(Time.now - 3 * DAYS) do
|
92
|
+
(200..250).each { |i| @counter.add('mycounter2', "item#{i}") }
|
93
|
+
end
|
94
|
+
|
95
|
+
counter_should_equal(@counter.intersection(['mycounter1', 'mycounter2']), 250)
|
96
|
+
counter_should_equal(@counter.intersection(['mycounter1', 'mycounter2'], Time.now.to_i - 3 * WEEKS), 250)
|
97
|
+
counter_should_equal(@counter.intersection(['mycounter1', 'mycounter2'], Time.now.to_i - 1 * WEEKS - 3 * DAYS), 150, 250)
|
98
|
+
counter_should_equal(@counter.intersection(['mycounter1', 'mycounter2'], Time.now.to_i - 6 * DAYS - 12 * HOURS), 50, 250)
|
99
|
+
counter_should_equal(@counter.intersection(['mycounter1', 'mycounter2'], Time.now.to_i - 5 * DAYS - 12 * HOURS), 50, 250)
|
100
|
+
counter_should_equal(@counter.intersection(['mycounter1', 'mycounter2'], Time.now.to_i - 4 * DAYS - 12 * HOURS), 50, 250)
|
101
|
+
counter_should_equal(@counter.intersection(['mycounter1', 'mycounter2'], Time.now.to_i - 3 * DAYS - 12 * HOURS), 0, 250)
|
102
|
+
counter_should_equal(@counter.intersection(['mycounter1', 'mycounter2'], Time.now.to_i - 2 * DAYS), 0, 250)
|
103
|
+
end
|
104
|
+
|
105
|
+
it "can use union_store to store snapshots of counters at particular points in time" do
|
106
|
+
Timecop.travel(Time.now - 2 * WEEKS) do
|
107
|
+
(0..100).each { |i| @counter.add('mycounter1', "item#{i}") }
|
108
|
+
end
|
109
|
+
Timecop.travel(Time.now - 1 * WEEKS) do
|
110
|
+
(100..200).each { |i| @counter.add('mycounter2', "item#{i}") }
|
111
|
+
end
|
112
|
+
Timecop.travel(Time.now - 6 * DAYS) do
|
113
|
+
(0..100).each { |i| @counter.add('mycounter2', "item#{i}") }
|
114
|
+
end
|
115
|
+
Timecop.travel(Time.now - 5 * DAYS) do
|
116
|
+
(100..200).each { |i| @counter.add('mycounter1', "item#{i}") }
|
117
|
+
end
|
118
|
+
Timecop.travel(Time.now - 4 * DAYS) do
|
119
|
+
(200..250).each { |i| @counter.add('mycounter1', "item#{i}") }
|
120
|
+
end
|
121
|
+
Timecop.travel(Time.now - 3 * DAYS) do
|
122
|
+
(200..250).each { |i| @counter.add('mycounter2', "item#{i}") }
|
123
|
+
end
|
124
|
+
|
125
|
+
@counter.union_store('counter1_1_week_ago', ['mycounter1'], Time.now.to_i - 1 * WEEKS)
|
126
|
+
@counter.union_store('counter2_5_days_ago', ['mycounter2'], Time.now.to_i - 5 * DAYS)
|
127
|
+
counter_should_equal(@counter.union(['counter1_1_week_ago', 'counter2_5_days_ago']), 150, 250)
|
128
|
+
end
|
129
|
+
|
130
|
+
it "allows you to override the time an event is registered when it's added" do
|
131
|
+
(0..1000).each { |i| @counter.add('mycounter', "item#{i}", Time.now.to_i - 3 * WEEKS) }
|
132
|
+
(1000..2000).each { |i| @counter.add('mycounter', "item#{i}", Time.now.to_i - 2 * WEEKS) }
|
133
|
+
(2000..3000).each { |i| @counter.add('mycounter', "item#{i}", Time.now.to_i - 1 * WEEKS) }
|
134
|
+
(3000..4000).each { |i| @counter.add('mycounter', "item#{i}") }
|
135
|
+
|
136
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 4 * WEEKS), 4000)
|
137
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 2 * WEEKS - 3 * DAYS), 3000)
|
138
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 1 * WEEKS - 3 * DAYS), 2000)
|
139
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 3 * DAYS), 1000)
|
140
|
+
end
|
141
|
+
|
142
|
+
it "doesn't screw up more recent counts when items are injected with earlier timestamp overrides" do
|
143
|
+
Timecop.travel(Time.now - 3 * WEEKS) do
|
144
|
+
(0..1000).each { |i| @counter.add('mycounter', "item#{i}") }
|
145
|
+
end
|
146
|
+
|
147
|
+
Timecop.travel(Time.now - 2 * WEEKS) do
|
148
|
+
(1000..2000).each { |i| @counter.add('mycounter', "item#{i}") }
|
149
|
+
end
|
150
|
+
|
151
|
+
Timecop.travel(Time.now - 1 * WEEKS) do
|
152
|
+
(2000..3000).each { |i| @counter.add('mycounter', "item#{i}") }
|
153
|
+
end
|
154
|
+
|
155
|
+
Timecop.travel(Time.now - 2 * DAYS) do
|
156
|
+
(1000..2000).each { |i| @counter.add('mycounter', "item#{i}") }
|
157
|
+
end
|
158
|
+
|
159
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 4 * WEEKS), 3000)
|
160
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 2 * WEEKS - 3 * DAYS), 2000)
|
161
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 1 * WEEKS - 3 * DAYS), 2000)
|
162
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 3 * DAYS), 1000)
|
163
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 1 * DAYS), 0)
|
164
|
+
|
165
|
+
# Shouldn't change counts, since they're updates to counts that happen later
|
166
|
+
# than the time we're trying to inject
|
167
|
+
(1000..2000).each { |i| @counter.add('mycounter', "item#{i}", Time.now.to_i - 1 * WEEKS) }
|
168
|
+
|
169
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 4 * WEEKS), 3000)
|
170
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 2 * WEEKS - 3 * DAYS), 2000)
|
171
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 1 * WEEKS - 3 * DAYS), 2000)
|
172
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 3 * DAYS), 1000)
|
173
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 1 * DAYS), 0)
|
174
|
+
|
175
|
+
# Should change counts, since they're updates to counts for items we've never
|
176
|
+
# seen before in the past
|
177
|
+
(3000..4000).each { |i| @counter.add('mycounter', "item#{i}", Time.now.to_i - 1 * WEEKS) }
|
178
|
+
|
179
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 4 * WEEKS), 4000)
|
180
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 2 * WEEKS - 3 * DAYS), 3000)
|
181
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 1 * WEEKS - 3 * DAYS), 3000)
|
182
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 3 * DAYS), 1000)
|
183
|
+
counter_should_equal(@counter.count('mycounter', Time.now.to_i - 1 * DAYS), 0)
|
184
|
+
end
|
185
|
+
|
186
|
+
it "can compute deltas over time on events correctly" do
|
187
|
+
# A larger-scale test that simulates user join events and tests that we can get
|
188
|
+
# week-by-week deltas. Generate new user counts according to the following
|
189
|
+
# weekly schedule: 55780 during the first week, 300 more during the next week,
|
190
|
+
# 10 more the next week, etc.
|
191
|
+
|
192
|
+
schedule = [55780, 300, 10, 4000, 1000, 1000, 5000, 15000, 30000, 3000]
|
193
|
+
schedule.each_with_index do |num_users, i|
|
194
|
+
Timecop.travel(Time.now - (schedule.length * WEEKS) + (i * WEEKS)) do
|
195
|
+
num_users.times do |i|
|
196
|
+
Timecop.travel(Time.now + 2 * HOURS + i) do
|
197
|
+
@counter.add("users", "user#{SecureRandom.uuid}")
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
actual_total = schedule.reduce(:+)
|
204
|
+
estimated_total = @counter.count("users")
|
205
|
+
(actual_total - estimated_total).abs.should < @expected_relative_error * actual_total
|
206
|
+
|
207
|
+
# Go through the schedule, computing week-by-week deltas and comparing them to the
|
208
|
+
# scheduled additions.
|
209
|
+
|
210
|
+
schedule.each_with_index do |users_joined, i|
|
211
|
+
week = schedule.length - 1 - i
|
212
|
+
c = @counter.count('users', Time.now.to_i - (week+1) * WEEKS) - @counter.count('users', Time.now.to_i - week * WEEKS)
|
213
|
+
(users_joined - c).abs.should < @expected_relative_error * schedule.reduce(:+)
|
214
|
+
end
|
215
|
+
end
|
216
|
+
end
|