metricstore 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,16 +1,70 @@
1
+ require 'couchbase'
2
+
1
3
  module Metricstore
4
+ # Internal class. Use this class outside the gem at your own risk.
2
5
  class CouchbaseClient
3
- include BaseClient
4
6
 
5
7
  def initialize(*args, &callback)
6
8
  super
9
+ @connection = Couchbase.connect(*args, &callback)
10
+ end
11
+
12
+ # key: a string
13
+ # delta: an integer
14
+ # options:
15
+ # :ttl => Time-to-live (number of seconds from now).
16
+ # returns: [value, cas_version_id]
17
+ def increment(key, delta, opts={})
18
+ options = {:initial => delta, :extended => true}
19
+ options.merge(:ttl => convert_ttl(opts[:ttl])) if opts.include?(:ttl)
20
+ value, flags, cas = connection.incr(key, delta, options)
21
+ [value, cas]
22
+ end
23
+
24
+ # key: a string
25
+ # value: a marshalable object
26
+ # options:
27
+ # :ttl => Time-to-live (number of seconds from now).
28
+ # returns: cas_version_id, or nil if the key already exists.
29
+ def add(key, value, opts={})
30
+ options = {}
31
+ options.merge(:ttl => convert_ttl(opts[:ttl])) if opts.include?(:ttl)
32
+ connection.add(key, value, opts)
33
+ rescue Couchbase::Error::KeyExists => e
34
+ nil
35
+ end
7
36
 
8
- require 'couchbase'
9
- @couchbase = Couchbase.connect(*args, &callback)
37
+ # key: a string
38
+ # value: a marshalable object
39
+ # options:
40
+ # :ttl => Time-to-live (number of seconds from now).
41
+ # :cas => a version id (for optimistic concurrency control)
42
+ # returns: cas_version_id, or nil if the key already exists.
43
+ def set(key, value, opts={})
44
+ options = {}
45
+ options.merge(:ttl => convert_ttl(opts[:ttl])) if opts.include?(:ttl)
46
+ options.merge(:cas => opts[:cas]) if opts.include?(:cas)
47
+ connection.set(key, value, opts)
48
+ rescue Couchbase::Error::KeyExists => e
49
+ nil
50
+ end
51
+
52
+ # key: a string
53
+ # returns: [value, cas_version_id], or nil if the key doesn't exist.
54
+ def fetch(key, opts={})
55
+ options = {:extended => true, :quiet => true}
56
+ options.merge(:ttl => convert_ttl(opts[:ttl])) if opts.include?(:ttl)
57
+ value, flags, cas = connection.get(key, options)
58
+ value.nil? ? nil : [value, cas]
10
59
  end
11
60
 
12
61
  private
13
62
 
14
- attr_reader :couchbase
63
+ attr_reader :connection
64
+
65
+ def convert_ttl(ttl)
66
+ ttl.nil? ? nil : ttl.to_f <= 0 ? nil : (Time.now + ttl.to_f).to_f
67
+ end
68
+
15
69
  end
16
70
  end
@@ -0,0 +1,23 @@
1
+ module Metricstore
2
+ class CountIncrementer < Updater
3
+
4
+ def increment(key, delta, ttl=nil)
5
+ return if delta.zero?
6
+ update(key, delta, ttl)
7
+ end
8
+
9
+ protected
10
+
11
+ def prepare_data(delta)
12
+ delta
13
+ end
14
+
15
+ def consolidate_data(delta1, delta2)
16
+ delta1 + delta2
17
+ end
18
+
19
+ def handle_update(key, delta, ttl, errors)
20
+ kvstore.increment(key, delta, :ttl => ttl)
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,4 @@
1
+ module Metricstore
2
+ class DataLossError < RuntimeError
3
+ end
4
+ end
@@ -0,0 +1,91 @@
1
+ module Metricstore
2
+ module HyperLogLog
3
+
4
+ HASH_BIT_SIZE = 512
5
+
6
+ ALPHA = {}
7
+ ALPHA[16] = 0.673 # m = 2**4
8
+ ALPHA[32] = 0.697 # m = 2**5
9
+ ALPHA[64] = 0.709 # m = 2**6
10
+ (7 .. 16).each do |b|
11
+ m = 2 ** b
12
+ ALPHA[m] = 0.7213/(1 + 1.1079/m)
13
+ end
14
+
15
+ class Builder
16
+ require 'digest/sha2'
17
+
18
+ attr_reader :bucket_count
19
+
20
+ # bucket_updater must have a method named "call" which takes two arguments
21
+ # the bucket index, and an integer value (of which it will track the max
22
+ # value per bucket).
23
+ def initialize(error_rate, bucket_updater)
24
+ @error_rate = error_rate
25
+ unless @error_rate > 0 && @error_rate < 1
26
+ raise(ArgumentError, "error_rate must be between 0 and 1")
27
+ end
28
+ @bits = HyperLogLog.bits_needed(error_rate)
29
+ unless (@bits + 10) <= HASH_BIT_SIZE
30
+ raise(ArgumentError, "error_rate is unattainable. be less picky.")
31
+ end
32
+ @bucket_count = 1 << @bits
33
+ @alpha = ALPHA[@bucket_count]
34
+ @bucket_updater = bucket_updater
35
+ end
36
+
37
+ def add(item)
38
+ hashed = hash_of(item)
39
+ offset = HASH_BIT_SIZE - @bits
40
+ mask = ((1 << @bits) - 1) << offset
41
+ shifted_front_bits = (hashed & mask)
42
+ front_bits = shifted_front_bits >> offset
43
+ back_bits = hashed - shifted_front_bits
44
+ bucket_index = front_bits
45
+ raise("BUG!") if bucket_index >= @bucket_count
46
+ next_on_bit_index = (HASH_BIT_SIZE - @bits).times.find{|i| back_bits[HASH_BIT_SIZE - @bits - i] == 1}
47
+ if next_on_bit_index.nil?
48
+ next_on_bit_index= HASH_BIT_SIZE - @bits
49
+ else
50
+ next_on_bit_index += 1
51
+ end
52
+ @bucket_updater.call(bucket_index, next_on_bit_index)
53
+ end
54
+
55
+ private
56
+
57
+ def hash_of(item)
58
+ sha = Digest::SHA2.new(HASH_BIT_SIZE)
59
+ sha << item.to_s
60
+ sha.to_s.to_i(16)
61
+ end
62
+ end
63
+
64
+ def self.bits_needed(error_rate)
65
+ Math.log((1.04 / error_rate) ** 2, 2).round
66
+ end
67
+
68
+ def self.estimate_cardinality(buckets)
69
+ values = buckets.to_a
70
+ m = values.size
71
+ raise("BUG!") unless m > 0
72
+ alpha = ALPHA[m]
73
+ raw = alpha * (m ** 2) / values.map{|x| 2 ** -(x || 0)}.inject(:+)
74
+ if raw <= 2.5 * m
75
+ # correct for being below ideal range
76
+ zero_registers = values.count(nil)
77
+ if zero_registers == 0
78
+ raw
79
+ else
80
+ m * Math.log(m.to_f / zero_registers)
81
+ end
82
+ elsif raw <= (2 ** HASH_BIT_SIZE) / 30.0
83
+ # ideal range
84
+ raw
85
+ else
86
+ # correct for being beyond ideal range
87
+ (-2 ** HASH_BIT_SIZE) * Math.log(1 - raw.to_f/(2**HASH_BIT_SIZE), 2)
88
+ end
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,41 @@
1
+ module Metricstore
2
+ class Incrementer < Updater
3
+
4
+ def increment(key, delta, ttl=nil)
5
+ return if delta.zero?
6
+ update(key, delta, ttl)
7
+ end
8
+
9
+ protected
10
+
11
+ def prepare_data(delta)
12
+ delta
13
+ end
14
+
15
+ def consolidate_data(delta1, delta2)
16
+ delta1 + delta2
17
+ end
18
+
19
+ def handle_update(key, delta, ttl, errors)
20
+ stored_value, cas = kvstore.fetch(key, :ttl => ttl)
21
+ if stored_value.nil?
22
+ if kvstore.add(key, delta, :ttl => ttl)
23
+ return delta
24
+ else
25
+ # collision
26
+ retry_update(key, delta, ttl, errors)
27
+ return nil
28
+ end
29
+ else
30
+ new_value = stored_value + delta
31
+ if kvstore.set(key, new_value, :ttl => ttl, :cas => cas)
32
+ return new_value
33
+ else
34
+ # collision
35
+ retry_update(key, min_max, ttl, errors)
36
+ return nil
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,78 @@
1
+ require 'set'
2
+
3
+ module Metricstore
4
+ class Inserter < Updater
5
+
6
+ def insert(key, values, ttl=nil)
7
+ update(key, values, ttl)
8
+ end
9
+
10
+ def list_threshold=(threshold)
11
+ @list_threshold = threshold
12
+ end
13
+
14
+ def list_threshold
15
+ @list_threshold ||= 100
16
+ end
17
+
18
+ protected
19
+
20
+ def prepare_data(values)
21
+ Set.new(Array(values))
22
+ end
23
+
24
+ def consolidate_data(values1, values2)
25
+ return 'overflow' if values1 == 'overflow' || values2 == 'overflow'
26
+ consolidated = values1 + values2
27
+ return 'overflow' if consolidated.size > list_threshold
28
+ consolidated
29
+ end
30
+
31
+ # Returns a list of the values that were newly inserted, or else nil
32
+ # if there was contention, and we have to retry.
33
+ def handle_update(key, values, ttl, errors)
34
+ return [] if values.nil? || values.empty?
35
+ #TODO: there's room here for a local cache optimization
36
+ list, cas = kvstore.fetch(key, :ttl => ttl)
37
+ if list.nil?
38
+ if values == 'overflow' || values.size > list_threshold
39
+ if kvstore.add(key, 'overflow', :ttl => ttl)
40
+ return []
41
+ else
42
+ # collision
43
+ retry_update(key, 'overflow', ttl, errors)
44
+ return nil
45
+ end
46
+ elsif kvstore.add(key, values.to_a, :ttl => ttl)
47
+ return values
48
+ else
49
+ # collision
50
+ retry_update(key, values, ttl, errors)
51
+ return nil
52
+ end
53
+ elsif list == 'overflow'
54
+ return []
55
+ else
56
+ list = Set.new(list)
57
+ values = values.reject{ |v| list.include?(v) }
58
+ return [] if values.empty?
59
+ new_list = values + list.to_a
60
+ if new_list.size > list_threshold
61
+ if kvstore.set(key, 'overflow', :cas => cas, :ttl => ttl)
62
+ return []
63
+ else
64
+ # collision
65
+ retry_update(key, 'overflow', ttl, errors)
66
+ return nil
67
+ end
68
+ elsif kvstore.set(key, new_list, :cas => cas, :ttl => ttl)
69
+ return values
70
+ else
71
+ # collision
72
+ retry_update(key, values, ttl, errors)
73
+ return nil
74
+ end
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,39 @@
1
+ module Metricstore
2
+ # Internal class. Use this class outside the gem at your own risk.
3
+ # TTL is ignored. Not thread-safe. For testing purposes only.
4
+ class MockKeyValueClient
5
+
6
+ def initialize(*args, &callback)
7
+ @store = {}
8
+ end
9
+
10
+ def increment(key, delta, opts={})
11
+ if @store.include?(key)
12
+ @store[key] += delta
13
+ else
14
+ @store[key] = delta
15
+ end
16
+ [@store[key], @store[key]]
17
+ end
18
+
19
+ def add(key, value, opts={})
20
+ return nil if @store.include?(key)
21
+ @store[key] = value
22
+ [value, value]
23
+ end
24
+
25
+ def set(key, value, opts={})
26
+ return nil if opts[:cas] && opts[:cas] != @store[key]
27
+ @store[key] = value
28
+ end
29
+
30
+ def fetch(key, opts={})
31
+ value = @store[key]
32
+ value.nil? ? nil : [value, value]
33
+ end
34
+
35
+ def to_s
36
+ "MockKeyValueClient: #{@store.inspect}"
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,27 @@
1
+ module Metricstore
2
+ module AllCombinations
3
+ def all_combinations
4
+ if block_given?
5
+ a = self.to_a
6
+ 0.upto(size) do |n|
7
+ a.combination(n) do |c|
8
+ yield c
9
+ end
10
+ end
11
+ else
12
+ Enumerator.new do |yielder|
13
+ a = self.to_a
14
+ 0.upto(size) do |n|
15
+ a.combination(n) do |c|
16
+ yielder << c
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
24
+
25
+ class Array
26
+ include Metricstore::AllCombinations
27
+ end
@@ -0,0 +1,58 @@
1
+ module Metricstore
2
+ class RangeUpdater < Updater
3
+
4
+ def update_range(key, value, ttl=nil)
5
+ raise(ArgumentError, "value must be numeric") unless value.is_a?(Numeric)
6
+ update(key, [value, value], ttl)
7
+ end
8
+
9
+ protected
10
+
11
+ def prepare_data(min_max)
12
+ min_max
13
+ end
14
+
15
+ def consolidate_data(min_max1, min_max2)
16
+ [min(min_max1[0], min_max2[0]), max(min_max1[1], min_max2[1])]
17
+ end
18
+
19
+ # Returns nil if there was contention, and we have to retry.
20
+ # Returns [:new, range] where range is (max - min), if range was added.
21
+ # Otherwise returns [:grew, diff] where diff is the amount the range grew.
22
+ def handle_update(key, min_max, ttl, errors)
23
+ #TODO: there's room here for a local cache optimization
24
+ stored_min_max, cas = kvstore.fetch(key, :ttl => ttl)
25
+ if stored_min_max.nil?
26
+ if kvstore.add(key, min_max, :ttl => ttl)
27
+ return [:new, (min_max[1] - min_max[0])]
28
+ else
29
+ # collision
30
+ retry_update(key, min_max, ttl, errors)
31
+ return nil
32
+ end
33
+ else
34
+ stored_min, stored_max = stored_min_max
35
+ new_min = min(stored_min, min_max[0])
36
+ new_max = max(stored_max, min_max[1])
37
+ return 0 if new_min == stored_min && new_max == stored_max
38
+ if kvstore.set(key, [new_min, new_max], :ttl => ttl, :cas => cas)
39
+ return [:grew, (stored_min - new_min) + (new_max - stored_max)]
40
+ else
41
+ # collision
42
+ retry_update(key, min_max, ttl, errors)
43
+ return nil
44
+ end
45
+ end
46
+ end
47
+
48
+ private
49
+
50
+ def min(a, b)
51
+ a.nil? ? b : b.nil? ? nil : (a < b) ? a : b
52
+ end
53
+
54
+ def max(a, b)
55
+ a.nil? ? b : b.nil? ? nil : (a < b) ? b : a
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,192 @@
1
+ require 'eventmachine'
2
+
3
+ module Metricstore
4
+
5
+ # Abstract class. Not thread-safe.
6
+ #
7
+ # Sub-classes must implement (protected) methods:
8
+ #
9
+ # prepare_data(data)
10
+ #
11
+ # consolidate_data(data1, data2)
12
+ #
13
+ # handle_update(key, data, ttl, errors)
14
+ # -> must return a truthy value if and only if the update occurred.
15
+ #
16
+ class Updater
17
+
18
+ # opts:
19
+ # :sleep_interval - sleep cycle length in seconds (default: 0.1).
20
+ # :kvstore - the underlying key-value store.
21
+ # :max_retry_delay_in_seconds - maximum length of time to wait after an error.
22
+ # :max_unhandled_errors - maximum number of retries before handling errors. Set this >= max_healthy_errors.
23
+ # :max_healthy_errors - maximum number of retries before healhty? returns false. Set this <= max_unhandled_errors.
24
+ def initialize(opts={})
25
+ @sleep_interval = (opts[:sleep_interval] || 0.1).to_f
26
+ @kvstore = required(opts, :kvstore)
27
+ @max_retry_delay = required(opts, :max_retry_delay_in_seconds).to_f
28
+ @max_unhandled_errors = required(opts, :max_unhandled_errors).to_i
29
+ @max_healthy_errors = required(opts, :max_healthy_errors).to_i
30
+
31
+ @timer = nil
32
+ @running = false
33
+ @healthy = nil
34
+ @pending_updates = {}
35
+ end
36
+
37
+ def start!
38
+ return if @running
39
+ @running = true
40
+ EM.next_tick { process! }
41
+ end
42
+
43
+ # Be sure to call this after tests, when you want to let go of the object.
44
+ def stop!
45
+ @running = false
46
+ if timer = @timer
47
+ EM.cancel_timer(timer)
48
+ @timer = nil
49
+ end
50
+ end
51
+
52
+ def healthy?
53
+ @healthy != false
54
+ end
55
+
56
+ # Approximate length of the queue
57
+ def backlog
58
+ @pending_updates.size
59
+ end
60
+
61
+ attr_accessor :handle_update_result
62
+
63
+ protected
64
+
65
+ attr_reader :kvstore
66
+
67
+ def required(opts, parameter_name)
68
+ opts[parameter_name] || raise("Missing parameter: #{parameter_name}")
69
+ end
70
+
71
+ def retry_update(key, data, ttl=nil, errors=[])
72
+ update(key, data, ttl, errors)
73
+ end
74
+
75
+ def update(key, data, ttl=nil, errors=[])
76
+ schedule(errors) do
77
+ pend_update(key, data, ttl, errors)
78
+ end
79
+ end
80
+
81
+ def consolidate_data(data1, data2)
82
+ raise NotImplementedError
83
+ end
84
+
85
+ def prepare_data(data)
86
+ raise NotImplementedError
87
+ end
88
+
89
+ # Sub-classes may want to over-ride this.
90
+ def handle_error(e)
91
+ if defined?(Airbrake) && Airbrake.configuration.environment_name && Airbrake.configuration.public?
92
+ Airbrake.notify(e)
93
+ elsif defined?(Exceptional) && Exceptional::Config.should_send_to_api?
94
+ Exceptional.handle(e)
95
+ else
96
+ puts e.inspect
97
+ raise
98
+ end
99
+ end
100
+
101
+ private
102
+
103
+ attr_reader :sleep_interval
104
+ attr_reader :max_healthy_errors
105
+ attr_reader :max_retry_delay
106
+ attr_reader :max_unhandled_errors
107
+
108
+ def schedule(errors=[], &callback)
109
+ if errors.size == 0
110
+ EM.next_tick { callback.call }
111
+ else
112
+ EM.add_timer(retry_delay_for(errors)) { callback.call }
113
+ end
114
+ end
115
+
116
+ def retry_delay_for(errors)
117
+ [2 ** (errors.size - 4), max_retry_delay / 2.0].min * (1 + rand)
118
+ end
119
+
120
+ # This must only be called by the EM reactor thread
121
+ def pend_update(key, data, ttl, errors)
122
+ pending = @pending_updates[key]
123
+ if pending.nil?
124
+ @pending_updates[key] = {:data => prepare_data(data), :ttl => ttl, :errors => errors}
125
+ else
126
+ pending[:data] = consolidate_data(pending[:data], prepare_data(data))
127
+ pending[:ttl] = max_ttl(ttl, pending[:ttl])
128
+ pending[:errors] += errors
129
+ end
130
+ rescue => e
131
+ handle_error(e)
132
+ end
133
+
134
+ def max_ttl(a, b)
135
+ return 0 if a.nil? || b.nil? || a == 0 || b == 0
136
+ [a,b].max
137
+ end
138
+
139
+ def reschedule_process!(sleep_first)
140
+ if @running
141
+ if timer = @timer
142
+ EM.cancel_timer(timer)
143
+ end
144
+ if sleep_first
145
+ @timer = EM.add_timer(sleep_interval) { process! }
146
+ else
147
+ EM.next_tick { process! }
148
+ end
149
+ end
150
+ end
151
+
152
+ def process!
153
+ @timer = nil
154
+ processed = 0
155
+ until @pending_updates.empty? || (processed+=1) > 10
156
+ key, update = @pending_updates.shift
157
+ process_update(key, update[:data], update[:ttl], update[:errors] || [])
158
+ end
159
+ reschedule_process!(@pending_updates.empty?)
160
+ rescue => e
161
+ handle_error(e)
162
+ end
163
+
164
+ def process_update(key, data, ttl, errors)
165
+ result = handle_update(key, data, ttl, errors)
166
+ unless result.nil?
167
+ @healthy = true
168
+ handle_update_result.call(key, result, ttl) if handle_update_result
169
+ end
170
+ rescue => e
171
+ # Uh oh. We stick the update back in the queue before handling the error.
172
+ begin
173
+ errors << e
174
+ @healthy = false if errors.size > max_healthy_errors
175
+ if errors.size <= max_unhandled_errors
176
+ update(key, data, ttl, errors)
177
+ else
178
+ update(key, data, ttl, [])
179
+ handle_each_error(errors)
180
+ end
181
+ rescue => e2
182
+ handle_error(e2) # bugs on bugs on bugs if you get here!
183
+ end
184
+ end
185
+
186
+ def handle_each_error(errors)
187
+ errors.uniq{|error| [error.message, error.backtrace]}.each do |error|
188
+ handle_error(error)
189
+ end
190
+ end
191
+ end
192
+ end