metricstore 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,16 +1,70 @@
1
+ require 'couchbase'
2
+
1
3
  module Metricstore
4
+ # Internal class. Use this class outside the gem at your own risk.
2
5
  class CouchbaseClient
3
- include BaseClient
4
6
 
5
7
  def initialize(*args, &callback)
6
8
  super
9
+ @connection = Couchbase.connect(*args, &callback)
10
+ end
11
+
12
+ # key: a string
13
+ # delta: an integer
14
+ # options:
15
+ # :ttl => Time-to-live (number of seconds from now).
16
+ # returns: [value, cas_version_id]
17
+ def increment(key, delta, opts={})
18
+ options = {:initial => delta, :extended => true}
19
+ options.merge(:ttl => convert_ttl(opts[:ttl])) if opts.include?(:ttl)
20
+ value, flags, cas = connection.incr(key, delta, options)
21
+ [value, cas]
22
+ end
23
+
24
+ # key: a string
25
+ # value: a marshalable object
26
+ # options:
27
+ # :ttl => Time-to-live (number of seconds from now).
28
+ # returns: cas_version_id, or nil if the key already exists.
29
+ def add(key, value, opts={})
30
+ options = {}
31
+ options.merge(:ttl => convert_ttl(opts[:ttl])) if opts.include?(:ttl)
32
+ connection.add(key, value, opts)
33
+ rescue Couchbase::Error::KeyExists => e
34
+ nil
35
+ end
7
36
 
8
- require 'couchbase'
9
- @couchbase = Couchbase.connect(*args, &callback)
37
+ # key: a string
38
+ # value: a marshalable object
39
+ # options:
40
+ # :ttl => Time-to-live (number of seconds from now).
41
+ # :cas => a version id (for optimistic concurrency control)
42
+ # returns: cas_version_id, or nil if the key already exists.
43
+ def set(key, value, opts={})
44
+ options = {}
45
+ options.merge(:ttl => convert_ttl(opts[:ttl])) if opts.include?(:ttl)
46
+ options.merge(:cas => opts[:cas]) if opts.include?(:cas)
47
+ connection.set(key, value, opts)
48
+ rescue Couchbase::Error::KeyExists => e
49
+ nil
50
+ end
51
+
52
+ # key: a string
53
+ # returns: [value, cas_version_id], or nil if the key doesn't exist.
54
+ def fetch(key, opts={})
55
+ options = {:extended => true, :quiet => true}
56
+ options.merge(:ttl => convert_ttl(opts[:ttl])) if opts.include?(:ttl)
57
+ value, flags, cas = connection.get(key, options)
58
+ value.nil? ? nil : [value, cas]
10
59
  end
11
60
 
12
61
  private
13
62
 
14
- attr_reader :couchbase
63
+ attr_reader :connection
64
+
65
+ def convert_ttl(ttl)
66
+ ttl.nil? ? nil : ttl.to_f <= 0 ? nil : (Time.now + ttl.to_f).to_f
67
+ end
68
+
15
69
  end
16
70
  end
@@ -0,0 +1,23 @@
1
+ module Metricstore
2
+ class CountIncrementer < Updater
3
+
4
+ def increment(key, delta, ttl=nil)
5
+ return if delta.zero?
6
+ update(key, delta, ttl)
7
+ end
8
+
9
+ protected
10
+
11
+ def prepare_data(delta)
12
+ delta
13
+ end
14
+
15
+ def consolidate_data(delta1, delta2)
16
+ delta1 + delta2
17
+ end
18
+
19
+ def handle_update(key, delta, ttl, errors)
20
+ kvstore.increment(key, delta, :ttl => ttl)
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,4 @@
1
+ module Metricstore
2
+ class DataLossError < RuntimeError
3
+ end
4
+ end
@@ -0,0 +1,91 @@
1
+ module Metricstore
2
+ module HyperLogLog
3
+
4
+ HASH_BIT_SIZE = 512
5
+
6
+ ALPHA = {}
7
+ ALPHA[16] = 0.673 # m = 2**4
8
+ ALPHA[32] = 0.697 # m = 2**5
9
+ ALPHA[64] = 0.709 # m = 2**6
10
+ (7 .. 16).each do |b|
11
+ m = 2 ** b
12
+ ALPHA[m] = 0.7213/(1 + 1.1079/m)
13
+ end
14
+
15
+ class Builder
16
+ require 'digest/sha2'
17
+
18
+ attr_reader :bucket_count
19
+
20
+ # bucket_updater must have a method named "call" which takes two arguments
21
+ # the bucket index, and an integer value (of which it will track the max
22
+ # value per bucket).
23
+ def initialize(error_rate, bucket_updater)
24
+ @error_rate = error_rate
25
+ unless @error_rate > 0 && @error_rate < 1
26
+ raise(ArgumentError, "error_rate must be between 0 and 1")
27
+ end
28
+ @bits = HyperLogLog.bits_needed(error_rate)
29
+ unless (@bits + 10) <= HASH_BIT_SIZE
30
+ raise(ArgumentError, "error_rate is unattainable. be less picky.")
31
+ end
32
+ @bucket_count = 1 << @bits
33
+ @alpha = ALPHA[@bucket_count]
34
+ @bucket_updater = bucket_updater
35
+ end
36
+
37
+ def add(item)
38
+ hashed = hash_of(item)
39
+ offset = HASH_BIT_SIZE - @bits
40
+ mask = ((1 << @bits) - 1) << offset
41
+ shifted_front_bits = (hashed & mask)
42
+ front_bits = shifted_front_bits >> offset
43
+ back_bits = hashed - shifted_front_bits
44
+ bucket_index = front_bits
45
+ raise("BUG!") if bucket_index >= @bucket_count
46
+ next_on_bit_index = (HASH_BIT_SIZE - @bits).times.find{|i| back_bits[HASH_BIT_SIZE - @bits - i] == 1}
47
+ if next_on_bit_index.nil?
48
+ next_on_bit_index= HASH_BIT_SIZE - @bits
49
+ else
50
+ next_on_bit_index += 1
51
+ end
52
+ @bucket_updater.call(bucket_index, next_on_bit_index)
53
+ end
54
+
55
+ private
56
+
57
+ def hash_of(item)
58
+ sha = Digest::SHA2.new(HASH_BIT_SIZE)
59
+ sha << item.to_s
60
+ sha.to_s.to_i(16)
61
+ end
62
+ end
63
+
64
+ def self.bits_needed(error_rate)
65
+ Math.log((1.04 / error_rate) ** 2, 2).round
66
+ end
67
+
68
+ def self.estimate_cardinality(buckets)
69
+ values = buckets.to_a
70
+ m = values.size
71
+ raise("BUG!") unless m > 0
72
+ alpha = ALPHA[m]
73
+ raw = alpha * (m ** 2) / values.map{|x| 2 ** -(x || 0)}.inject(:+)
74
+ if raw <= 2.5 * m
75
+ # correct for being below ideal range
76
+ zero_registers = values.count(nil)
77
+ if zero_registers == 0
78
+ raw
79
+ else
80
+ m * Math.log(m.to_f / zero_registers)
81
+ end
82
+ elsif raw <= (2 ** HASH_BIT_SIZE) / 30.0
83
+ # ideal range
84
+ raw
85
+ else
86
+ # correct for being beyond ideal range
87
+ (-2 ** HASH_BIT_SIZE) * Math.log(1 - raw.to_f/(2**HASH_BIT_SIZE), 2)
88
+ end
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,41 @@
1
+ module Metricstore
2
+ class Incrementer < Updater
3
+
4
+ def increment(key, delta, ttl=nil)
5
+ return if delta.zero?
6
+ update(key, delta, ttl)
7
+ end
8
+
9
+ protected
10
+
11
+ def prepare_data(delta)
12
+ delta
13
+ end
14
+
15
+ def consolidate_data(delta1, delta2)
16
+ delta1 + delta2
17
+ end
18
+
19
+ def handle_update(key, delta, ttl, errors)
20
+ stored_value, cas = kvstore.fetch(key, :ttl => ttl)
21
+ if stored_value.nil?
22
+ if kvstore.add(key, delta, :ttl => ttl)
23
+ return delta
24
+ else
25
+ # collision
26
+ retry_update(key, delta, ttl, errors)
27
+ return nil
28
+ end
29
+ else
30
+ new_value = stored_value + delta
31
+ if kvstore.set(key, new_value, :ttl => ttl, :cas => cas)
32
+ return new_value
33
+ else
34
+ # collision
35
+ retry_update(key, min_max, ttl, errors)
36
+ return nil
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,78 @@
1
+ require 'set'
2
+
3
+ module Metricstore
4
+ class Inserter < Updater
5
+
6
+ def insert(key, values, ttl=nil)
7
+ update(key, values, ttl)
8
+ end
9
+
10
+ def list_threshold=(threshold)
11
+ @list_threshold = threshold
12
+ end
13
+
14
+ def list_threshold
15
+ @list_threshold ||= 100
16
+ end
17
+
18
+ protected
19
+
20
+ def prepare_data(values)
21
+ Set.new(Array(values))
22
+ end
23
+
24
+ def consolidate_data(values1, values2)
25
+ return 'overflow' if values1 == 'overflow' || values2 == 'overflow'
26
+ consolidated = values1 + values2
27
+ return 'overflow' if consolidated.size > list_threshold
28
+ consolidated
29
+ end
30
+
31
+ # Returns a list of the values that were newly inserted, or else nil
32
+ # if there was contention, and we have to retry.
33
+ def handle_update(key, values, ttl, errors)
34
+ return [] if values.nil? || values.empty?
35
+ #TODO: there's room here for a local cache optimization
36
+ list, cas = kvstore.fetch(key, :ttl => ttl)
37
+ if list.nil?
38
+ if values == 'overflow' || values.size > list_threshold
39
+ if kvstore.add(key, 'overflow', :ttl => ttl)
40
+ return []
41
+ else
42
+ # collision
43
+ retry_update(key, 'overflow', ttl, errors)
44
+ return nil
45
+ end
46
+ elsif kvstore.add(key, values.to_a, :ttl => ttl)
47
+ return values
48
+ else
49
+ # collision
50
+ retry_update(key, values, ttl, errors)
51
+ return nil
52
+ end
53
+ elsif list == 'overflow'
54
+ return []
55
+ else
56
+ list = Set.new(list)
57
+ values = values.reject{ |v| list.include?(v) }
58
+ return [] if values.empty?
59
+ new_list = values + list.to_a
60
+ if new_list.size > list_threshold
61
+ if kvstore.set(key, 'overflow', :cas => cas, :ttl => ttl)
62
+ return []
63
+ else
64
+ # collision
65
+ retry_update(key, 'overflow', ttl, errors)
66
+ return nil
67
+ end
68
+ elsif kvstore.set(key, new_list, :cas => cas, :ttl => ttl)
69
+ return values
70
+ else
71
+ # collision
72
+ retry_update(key, values, ttl, errors)
73
+ return nil
74
+ end
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,39 @@
1
+ module Metricstore
2
+ # Internal class. Use this class outside the gem at your own risk.
3
+ # TTL is ignored. Not thread-safe. For testing purposes only.
4
+ class MockKeyValueClient
5
+
6
+ def initialize(*args, &callback)
7
+ @store = {}
8
+ end
9
+
10
+ def increment(key, delta, opts={})
11
+ if @store.include?(key)
12
+ @store[key] += delta
13
+ else
14
+ @store[key] = delta
15
+ end
16
+ [@store[key], @store[key]]
17
+ end
18
+
19
+ def add(key, value, opts={})
20
+ return nil if @store.include?(key)
21
+ @store[key] = value
22
+ [value, value]
23
+ end
24
+
25
+ def set(key, value, opts={})
26
+ return nil if opts[:cas] && opts[:cas] != @store[key]
27
+ @store[key] = value
28
+ end
29
+
30
+ def fetch(key, opts={})
31
+ value = @store[key]
32
+ value.nil? ? nil : [value, value]
33
+ end
34
+
35
+ def to_s
36
+ "MockKeyValueClient: #{@store.inspect}"
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,27 @@
1
+ module Metricstore
2
+ module AllCombinations
3
+ def all_combinations
4
+ if block_given?
5
+ a = self.to_a
6
+ 0.upto(size) do |n|
7
+ a.combination(n) do |c|
8
+ yield c
9
+ end
10
+ end
11
+ else
12
+ Enumerator.new do |yielder|
13
+ a = self.to_a
14
+ 0.upto(size) do |n|
15
+ a.combination(n) do |c|
16
+ yielder << c
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
24
+
25
+ class Array
26
+ include Metricstore::AllCombinations
27
+ end
@@ -0,0 +1,58 @@
1
+ module Metricstore
2
+ class RangeUpdater < Updater
3
+
4
+ def update_range(key, value, ttl=nil)
5
+ raise(ArgumentError, "value must be numeric") unless value.is_a?(Numeric)
6
+ update(key, [value, value], ttl)
7
+ end
8
+
9
+ protected
10
+
11
+ def prepare_data(min_max)
12
+ min_max
13
+ end
14
+
15
+ def consolidate_data(min_max1, min_max2)
16
+ [min(min_max1[0], min_max2[0]), max(min_max1[1], min_max2[1])]
17
+ end
18
+
19
+ # Returns nil if there was contention, and we have to retry.
20
+ # Returns [:new, range] where range is (max - min), if range was added.
21
+ # Otherwise returns [:grew, diff] where diff is the amount the range grew.
22
+ def handle_update(key, min_max, ttl, errors)
23
+ #TODO: there's room here for a local cache optimization
24
+ stored_min_max, cas = kvstore.fetch(key, :ttl => ttl)
25
+ if stored_min_max.nil?
26
+ if kvstore.add(key, min_max, :ttl => ttl)
27
+ return [:new, (min_max[1] - min_max[0])]
28
+ else
29
+ # collision
30
+ retry_update(key, min_max, ttl, errors)
31
+ return nil
32
+ end
33
+ else
34
+ stored_min, stored_max = stored_min_max
35
+ new_min = min(stored_min, min_max[0])
36
+ new_max = max(stored_max, min_max[1])
37
+ return 0 if new_min == stored_min && new_max == stored_max
38
+ if kvstore.set(key, [new_min, new_max], :ttl => ttl, :cas => cas)
39
+ return [:grew, (stored_min - new_min) + (new_max - stored_max)]
40
+ else
41
+ # collision
42
+ retry_update(key, min_max, ttl, errors)
43
+ return nil
44
+ end
45
+ end
46
+ end
47
+
48
+ private
49
+
50
+ def min(a, b)
51
+ a.nil? ? b : b.nil? ? nil : (a < b) ? a : b
52
+ end
53
+
54
+ def max(a, b)
55
+ a.nil? ? b : b.nil? ? nil : (a < b) ? b : a
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,192 @@
1
+ require 'eventmachine'
2
+
3
+ module Metricstore
4
+
5
+ # Abstract class. Not thread-safe.
6
+ #
7
+ # Sub-classes must implement (protected) methods:
8
+ #
9
+ # prepare_data(data)
10
+ #
11
+ # consolidate_data(data1, data2)
12
+ #
13
+ # handle_update(key, data, ttl, errors)
14
+ # -> must return a truthy value if and only if the update occurred.
15
+ #
16
+ class Updater
17
+
18
+ # opts:
19
+ # :sleep_interval - sleep cycle length in seconds (default: 0.1).
20
+ # :kvstore - the underlying key-value store.
21
+ # :max_retry_delay_in_seconds - maximum length of time to wait after an error.
22
+ # :max_unhandled_errors - maximum number of retries before handling errors. Set this >= max_healthy_errors.
23
+ # :max_healthy_errors - maximum number of retries before healhty? returns false. Set this <= max_unhandled_errors.
24
+ def initialize(opts={})
25
+ @sleep_interval = (opts[:sleep_interval] || 0.1).to_f
26
+ @kvstore = required(opts, :kvstore)
27
+ @max_retry_delay = required(opts, :max_retry_delay_in_seconds).to_f
28
+ @max_unhandled_errors = required(opts, :max_unhandled_errors).to_i
29
+ @max_healthy_errors = required(opts, :max_healthy_errors).to_i
30
+
31
+ @timer = nil
32
+ @running = false
33
+ @healthy = nil
34
+ @pending_updates = {}
35
+ end
36
+
37
+ def start!
38
+ return if @running
39
+ @running = true
40
+ EM.next_tick { process! }
41
+ end
42
+
43
+ # Be sure to call this after tests, when you want to let go of the object.
44
+ def stop!
45
+ @running = false
46
+ if timer = @timer
47
+ EM.cancel_timer(timer)
48
+ @timer = nil
49
+ end
50
+ end
51
+
52
+ def healthy?
53
+ @healthy != false
54
+ end
55
+
56
+ # Approximate length of the queue
57
+ def backlog
58
+ @pending_updates.size
59
+ end
60
+
61
+ attr_accessor :handle_update_result
62
+
63
+ protected
64
+
65
+ attr_reader :kvstore
66
+
67
+ def required(opts, parameter_name)
68
+ opts[parameter_name] || raise("Missing parameter: #{parameter_name}")
69
+ end
70
+
71
+ def retry_update(key, data, ttl=nil, errors=[])
72
+ update(key, data, ttl, errors)
73
+ end
74
+
75
+ def update(key, data, ttl=nil, errors=[])
76
+ schedule(errors) do
77
+ pend_update(key, data, ttl, errors)
78
+ end
79
+ end
80
+
81
+ def consolidate_data(data1, data2)
82
+ raise NotImplementedError
83
+ end
84
+
85
+ def prepare_data(data)
86
+ raise NotImplementedError
87
+ end
88
+
89
+ # Sub-classes may want to over-ride this.
90
+ def handle_error(e)
91
+ if defined?(Airbrake) && Airbrake.configuration.environment_name && Airbrake.configuration.public?
92
+ Airbrake.notify(e)
93
+ elsif defined?(Exceptional) && Exceptional::Config.should_send_to_api?
94
+ Exceptional.handle(e)
95
+ else
96
+ puts e.inspect
97
+ raise
98
+ end
99
+ end
100
+
101
+ private
102
+
103
+ attr_reader :sleep_interval
104
+ attr_reader :max_healthy_errors
105
+ attr_reader :max_retry_delay
106
+ attr_reader :max_unhandled_errors
107
+
108
+ def schedule(errors=[], &callback)
109
+ if errors.size == 0
110
+ EM.next_tick { callback.call }
111
+ else
112
+ EM.add_timer(retry_delay_for(errors)) { callback.call }
113
+ end
114
+ end
115
+
116
+ def retry_delay_for(errors)
117
+ [2 ** (errors.size - 4), max_retry_delay / 2.0].min * (1 + rand)
118
+ end
119
+
120
+ # This must only be called by the EM reactor thread
121
+ def pend_update(key, data, ttl, errors)
122
+ pending = @pending_updates[key]
123
+ if pending.nil?
124
+ @pending_updates[key] = {:data => prepare_data(data), :ttl => ttl, :errors => errors}
125
+ else
126
+ pending[:data] = consolidate_data(pending[:data], prepare_data(data))
127
+ pending[:ttl] = max_ttl(ttl, pending[:ttl])
128
+ pending[:errors] += errors
129
+ end
130
+ rescue => e
131
+ handle_error(e)
132
+ end
133
+
134
+ def max_ttl(a, b)
135
+ return 0 if a.nil? || b.nil? || a == 0 || b == 0
136
+ [a,b].max
137
+ end
138
+
139
+ def reschedule_process!(sleep_first)
140
+ if @running
141
+ if timer = @timer
142
+ EM.cancel_timer(timer)
143
+ end
144
+ if sleep_first
145
+ @timer = EM.add_timer(sleep_interval) { process! }
146
+ else
147
+ EM.next_tick { process! }
148
+ end
149
+ end
150
+ end
151
+
152
+ def process!
153
+ @timer = nil
154
+ processed = 0
155
+ until @pending_updates.empty? || (processed+=1) > 10
156
+ key, update = @pending_updates.shift
157
+ process_update(key, update[:data], update[:ttl], update[:errors] || [])
158
+ end
159
+ reschedule_process!(@pending_updates.empty?)
160
+ rescue => e
161
+ handle_error(e)
162
+ end
163
+
164
+ def process_update(key, data, ttl, errors)
165
+ result = handle_update(key, data, ttl, errors)
166
+ unless result.nil?
167
+ @healthy = true
168
+ handle_update_result.call(key, result, ttl) if handle_update_result
169
+ end
170
+ rescue => e
171
+ # Uh oh. We stick the update back in the queue before handling the error.
172
+ begin
173
+ errors << e
174
+ @healthy = false if errors.size > max_healthy_errors
175
+ if errors.size <= max_unhandled_errors
176
+ update(key, data, ttl, errors)
177
+ else
178
+ update(key, data, ttl, [])
179
+ handle_each_error(errors)
180
+ end
181
+ rescue => e2
182
+ handle_error(e2) # bugs on bugs on bugs if you get here!
183
+ end
184
+ end
185
+
186
+ def handle_each_error(errors)
187
+ errors.uniq{|error| [error.message, error.backtrace]}.each do |error|
188
+ handle_error(error)
189
+ end
190
+ end
191
+ end
192
+ end