promiscuous 0.90.0 → 0.91.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (65) hide show
  1. checksums.yaml +7 -0
  2. data/lib/promiscuous/amqp/bunny.rb +63 -36
  3. data/lib/promiscuous/amqp/fake.rb +3 -1
  4. data/lib/promiscuous/amqp/hot_bunnies.rb +26 -16
  5. data/lib/promiscuous/amqp/null.rb +1 -0
  6. data/lib/promiscuous/amqp.rb +12 -12
  7. data/lib/promiscuous/cli.rb +70 -29
  8. data/lib/promiscuous/config.rb +54 -29
  9. data/lib/promiscuous/convenience.rb +1 -1
  10. data/lib/promiscuous/dependency.rb +25 -6
  11. data/lib/promiscuous/error/connection.rb +11 -9
  12. data/lib/promiscuous/error/dependency.rb +8 -1
  13. data/lib/promiscuous/loader.rb +4 -2
  14. data/lib/promiscuous/publisher/bootstrap/connection.rb +25 -0
  15. data/lib/promiscuous/publisher/bootstrap/data.rb +127 -0
  16. data/lib/promiscuous/publisher/bootstrap/mode.rb +19 -0
  17. data/lib/promiscuous/publisher/bootstrap/status.rb +40 -0
  18. data/lib/promiscuous/publisher/bootstrap/version.rb +46 -0
  19. data/lib/promiscuous/publisher/bootstrap.rb +27 -0
  20. data/lib/promiscuous/publisher/context/base.rb +67 -0
  21. data/lib/promiscuous/{middleware.rb → publisher/context/middleware.rb} +16 -13
  22. data/lib/promiscuous/publisher/context/transaction.rb +36 -0
  23. data/lib/promiscuous/publisher/context.rb +4 -88
  24. data/lib/promiscuous/publisher/mock_generator.rb +9 -9
  25. data/lib/promiscuous/publisher/model/active_record.rb +7 -7
  26. data/lib/promiscuous/publisher/model/base.rb +29 -29
  27. data/lib/promiscuous/publisher/model/ephemeral.rb +5 -3
  28. data/lib/promiscuous/publisher/model/mock.rb +9 -5
  29. data/lib/promiscuous/publisher/model/mongoid.rb +5 -22
  30. data/lib/promiscuous/publisher/operation/active_record.rb +360 -0
  31. data/lib/promiscuous/publisher/operation/atomic.rb +167 -0
  32. data/lib/promiscuous/publisher/operation/base.rb +279 -474
  33. data/lib/promiscuous/publisher/operation/mongoid.rb +153 -145
  34. data/lib/promiscuous/publisher/operation/non_persistent.rb +28 -0
  35. data/lib/promiscuous/publisher/operation/proxy_for_query.rb +42 -0
  36. data/lib/promiscuous/publisher/operation/transaction.rb +85 -0
  37. data/lib/promiscuous/publisher/operation.rb +1 -1
  38. data/lib/promiscuous/publisher/worker.rb +7 -7
  39. data/lib/promiscuous/publisher.rb +1 -1
  40. data/lib/promiscuous/railtie.rb +20 -5
  41. data/lib/promiscuous/redis.rb +104 -56
  42. data/lib/promiscuous/subscriber/message_processor/base.rb +38 -0
  43. data/lib/promiscuous/subscriber/message_processor/bootstrap.rb +17 -0
  44. data/lib/promiscuous/subscriber/message_processor/regular.rb +192 -0
  45. data/lib/promiscuous/subscriber/message_processor.rb +4 -0
  46. data/lib/promiscuous/subscriber/model/base.rb +20 -15
  47. data/lib/promiscuous/subscriber/model/mongoid.rb +4 -4
  48. data/lib/promiscuous/subscriber/model/observer.rb +16 -2
  49. data/lib/promiscuous/subscriber/operation/base.rb +68 -0
  50. data/lib/promiscuous/subscriber/operation/bootstrap.rb +54 -0
  51. data/lib/promiscuous/subscriber/operation/regular.rb +13 -0
  52. data/lib/promiscuous/subscriber/operation.rb +3 -166
  53. data/lib/promiscuous/subscriber/worker/message.rb +61 -35
  54. data/lib/promiscuous/subscriber/worker/message_synchronizer.rb +90 -59
  55. data/lib/promiscuous/subscriber/worker/pump.rb +17 -5
  56. data/lib/promiscuous/subscriber/worker/recorder.rb +4 -1
  57. data/lib/promiscuous/subscriber/worker/runner.rb +49 -9
  58. data/lib/promiscuous/subscriber/worker/stats.rb +2 -2
  59. data/lib/promiscuous/subscriber/worker.rb +6 -0
  60. data/lib/promiscuous/subscriber.rb +1 -1
  61. data/lib/promiscuous/timer.rb +31 -18
  62. data/lib/promiscuous/version.rb +1 -1
  63. data/lib/promiscuous.rb +23 -3
  64. metadata +104 -89
  65. data/lib/promiscuous/subscriber/payload.rb +0 -34
@@ -1,41 +1,31 @@
1
1
  class Promiscuous::Publisher::Operation::Base
2
- class TryAgain < RuntimeError; end
3
- VERSION_FIELD = '_pv'
2
+ mattr_accessor :recovery_mechanisms
3
+ self.recovery_mechanisms = []
4
4
 
5
- attr_accessor :operation, :operation_ext, :instance, :selector_keys
6
-
7
- def initialize(options={})
8
- # XXX instance is not always an instance, it can be a selector
9
- # representation.
10
- @instance = options[:instance]
11
- @operation = options[:operation]
12
- @operation_ext = options[:operation_ext]
13
- @multi = options[:multi]
5
+ def self.register_recovery_mechanism(method_name=nil, &block)
6
+ self.recovery_mechanisms << (block || method(method_name))
14
7
  end
15
8
 
16
- def read?
17
- operation == :read
9
+ def self.run_recovery_mechanisms
10
+ self.recovery_mechanisms.each(&:call)
18
11
  end
19
12
 
20
- def write?
21
- !read?
22
- end
13
+ attr_accessor :operation
23
14
 
24
- def multi?
25
- !!@multi
15
+ def initialize(options={})
16
+ @operation = options[:operation]
26
17
  end
27
18
 
28
- def single?
29
- !@multi
19
+ def read?
20
+ @operation == :read
30
21
  end
31
22
 
32
- def persists?
33
- # TODO For writes in transactions, it should be false
34
- write?
23
+ def write?
24
+ !read?
35
25
  end
36
26
 
37
- def failed?
38
- !!@exception
27
+ def recovering?
28
+ !!@recovering
39
29
  end
40
30
 
41
31
  def current_context
@@ -67,7 +57,7 @@ class Promiscuous::Publisher::Operation::Base
67
57
  end
68
58
 
69
59
  def publish_payload_in_rabbitmq_async
70
- Promiscuous::AMQP.publish(:key => @amqp_key, :payload => @payload,
60
+ Promiscuous::AMQP.publish(:key => Promiscuous::Config.app, :payload => @payload,
71
61
  :on_confirm => method(:on_rabbitmq_confirm))
72
62
  end
73
63
 
@@ -87,33 +77,37 @@ class Promiscuous::Publisher::Operation::Base
87
77
  node.zadd(rabbitmq_staging_set_key, Time.now.to_i, key)
88
78
  payload = node.get(key)
89
79
 
90
- Promiscuous.info "[payload recovery] #{payload}"
91
- new.instance_eval do
92
- @payload_recovery_node = node
93
- @payload_recovery_key = key
94
- @amqp_key = MultiJson.load(payload)['__amqp__']
95
- @payload = payload
96
- publish_payload_in_rabbitmq_async
80
+ # It's possible that the payload is nil as the message could be
81
+ # recovered by another worker
82
+ if payload
83
+ Promiscuous.info "[payload recovery] #{payload}"
84
+ new.instance_eval do
85
+ @payload_recovery_node = node
86
+ @payload_recovery_key = key
87
+ @payload = payload
88
+ @recovery = true
89
+ publish_payload_in_rabbitmq_async
90
+ end
97
91
  end
98
92
  end
99
93
  end
100
94
  end
95
+ register_recovery_mechanism :recover_payloads_for_rabbitmq
101
96
 
102
97
  def publish_payload_in_redis
103
98
  # TODO Optimize and DRY this up
104
99
  r = @committed_read_deps
105
100
  w = @committed_write_deps
106
101
 
107
- master_node = w.first.redis_node
108
- operation_recovery_key = w.first.key(:pub).join('operation_recovery').to_s
109
- # We identify a payload with a unique key (id:id_value:current_version) to
110
- # avoid collisions with other updates on the same document.
102
+ # We identify a payload with a unique key (id:id_value:current_version:payload_recovery)
103
+ # to avoid collisions with other updates on the same document.
104
+ master_node = @op_lock.node
111
105
  @payload_recovery_node = master_node
112
- @payload_recovery_key = w.first.key(:pub).join(w.first.version).to_s
106
+ @payload_recovery_key = Promiscuous::Key.new(:pub).join('payload_recovery', @op_lock.token).to_s
113
107
 
114
108
  # We need to be able to recover from a redis failure. By sending the
115
109
  # payload to the slave first, we ensure that we can replay the lost
116
- # payloads if the primary came to fail.
110
+ # payloads if the master came to fail.
117
111
  # We still need to recover the lost operations. This can be done by doing a
118
112
  # version diff from what is stored in the database and the recovered redis slave.
119
113
  # XXX TODO
@@ -123,113 +117,108 @@ class Promiscuous::Publisher::Operation::Base
123
117
  # happen if we lost the lock without knowing about it.
124
118
  # The payload can be sent twice, which is okay since the subscribers
125
119
  # tolerate it.
120
+ operation_recovery_key = "#{@op_lock.key}:operation_recovery"
121
+ versions_recovery_key = "#{operation_recovery_key}:versions"
126
122
 
127
- nodes = (w+r).map(&:redis_node).uniq
128
- if nodes.size == 1
129
- # We just have the master node. Since we are atomic, we don't need to do
130
- # the 2pc dance.
131
- master_node.multi do
132
- master_node.del(operation_recovery_key)
133
- master_node.set(@payload_recovery_key, @payload)
134
- master_node.zadd(rabbitmq_staging_set_key, Time.now.to_i, @payload_recovery_key)
135
- end
136
- else
137
- master_node.multi do
138
- master_node.set(@payload_recovery_key, @payload)
139
- master_node.zadd(rabbitmq_staging_set_key, Time.now.to_i, @payload_recovery_key)
140
- end
141
-
142
- # The payload is safe now. We can cleanup all the versions on the
143
- # secondary. Note that we need to clear the master node at the end,
144
- # as it acts as a lock on the other keys. This is important to avoid a
145
- # race where we would delete data that doesn't belong to the current
146
- # operation due to a lock loss.
147
- nodes.reject { |node| node == master_node }
148
- .each { |node| node.del(operation_recovery_key) }
123
+ master_node.multi do
124
+ master_node.set(@payload_recovery_key, @payload)
125
+ master_node.zadd(rabbitmq_staging_set_key, Time.now.to_i, @payload_recovery_key)
149
126
  master_node.del(operation_recovery_key)
127
+ master_node.del(versions_recovery_key)
150
128
  end
129
+
130
+ # The payload is safe now. We can cleanup all the versions on the
131
+ # secondary. There are no harmful races that can happen since the
132
+ # secondary_operation_recovery_key is unique to the operation.
133
+ # XXX The caveat is that if we die here, the
134
+ # secondary_operation_recovery_key will never be cleaned up.
135
+ (w+r).map(&:redis_node).uniq
136
+ .reject { |node| node == master_node }
137
+ .each { |node| node.del(versions_recovery_key) }
151
138
  end
152
139
 
153
- def generate_payload_and_clear_operations
154
- # TODO Transactions with multi writes
155
- raise "We don't support multi writes yet" if previous_successful_operations.select(&:write?).size > 1
156
- raise "The instance is gone, or there is a version mismatch" unless @instance
140
+ def payload_for(instance)
141
+ options = { :with_attributes => self.operation.in?([:create, :update]) }
142
+ instance.promiscuous.payload(options).tap do |payload|
143
+ payload[:operation] = self.operation
144
+ end
145
+ end
157
146
 
158
- payload = @instance.promiscuous.payload(:with_attributes => operation.in?([:create, :update]))
147
+ def generate_payload
148
+ payload = {}
149
+ payload[:operations] = operation_payloads
159
150
  payload[:context] = current_context.name
151
+ payload[:app] = Promiscuous::Config.app
160
152
  payload[:timestamp] = @timestamp
161
-
162
- # If the db operation has failed, so we publish a dummy operation on the
163
- # failed instance. It's better than using the Dummy polisher class
164
- # because a subscriber can choose not to receive any of these messages.
165
- payload[:operation] = self.failed? ? :dummy : operation
166
-
167
- # We need to consider the last write operation as an implicit read
168
- # dependency. This is why we don't need to consider the read dependencies
169
- # happening before a first write when publishing the second write in a
170
- # context.
153
+ payload[:host] = Socket.gethostname
154
+ payload[:current_user_id] = Thread.current[:promiscuous_context].try(:current_user_id)
171
155
  payload[:dependencies] = {}
172
156
  payload[:dependencies][:read] = @committed_read_deps if @committed_read_deps.present?
173
157
  payload[:dependencies][:write] = @committed_write_deps
174
158
 
175
- current_context.last_write_dependency = @committed_write_deps.first
176
- current_context.operations.clear
177
-
178
- @amqp_key = payload[:__amqp__]
179
159
  @payload = MultiJson.dump(payload)
180
160
  end
181
161
 
162
+ def clear_previous_dependencies
163
+ current_context.read_operations.clear
164
+ current_context.extra_dependencies = [@committed_write_deps.first]
165
+ end
166
+
182
167
  def self.recover_operation_from_lock(lock)
183
168
  # We happen to have acquired a never released lock.
184
169
  # The database instance is thus still prestine.
185
- # Three cases to consider:
186
- # 1) the key is not an id dependency or the payload queue stage was passed
187
- # 2) The write query was never executed, we must send a dummy operation
188
- # 3) The write query was executed, but never passed the payload queue stage
189
170
 
190
171
  master_node = lock.node
191
- recovery_data = master_node.hgetall("#{lock.key}:operation_recovery")
192
- return nil unless recovery_data.present? # case 1)
172
+ recovery_data = master_node.get("#{lock.key}:operation_recovery")
173
+
174
+ unless recovery_data.present?
175
+ lock.unlock
176
+ return
177
+ end
193
178
 
194
179
  Promiscuous.info "[operation recovery] #{lock.key} -> #{recovery_data}"
195
180
 
196
- collection, instance_id, operation,
197
- document, read_dependencies, write_dependencies = *MultiJson.load(recovery_data['payload'])
181
+ op_klass, operation, read_dependencies,
182
+ write_dependencies, recovery_arguments = *MultiJson.load(recovery_data)
198
183
 
199
184
  operation = operation.to_sym
200
- read_dependencies.map! { |k| Promiscuous::Dependency.parse(k.to_s) }
201
- write_dependencies.map! { |k| Promiscuous::Dependency.parse(k.to_s) }
185
+ read_dependencies.map! { |k| Promiscuous::Dependency.parse(k.to_s, :type => :read) }
186
+ write_dependencies.map! { |k| Promiscuous::Dependency.parse(k.to_s, :type => :write) }
202
187
 
203
- model = Promiscuous::Publisher::Model.publishers[collection]
204
-
205
- if model.is_a? Promiscuous::Publisher::Model::Ephemeral
206
- operation = :dummy
207
- else
208
- # TODO Abstract db operations.
209
- # We need to query on the root model
210
- model = model.collection.name.singularize.camelize.constantize
188
+ begin
189
+ op = op_klass.constantize.recover_operation(*recovery_arguments)
190
+ rescue NameError
191
+ raise "invalid recover operation class: #{op_klass}"
211
192
  end
212
193
 
213
- op_klass = model.get_operation_class_for(operation)
214
- op = op_klass.recover_operation(model, instance_id, document)
215
- op.operation = operation
216
-
217
- Promiscuous.context :operation_recovery, :detached_from_parent => true do
218
- op.instance_eval do
219
- @read_dependencies = read_dependencies
220
- @write_dependencies = write_dependencies
221
- @locks = [lock]
222
- execute_persistent_locked { recover_db_operation }
194
+ Thread.new do
195
+ # We run the recovery in another thread to ensure that we get a new
196
+ # database connection to avoid tempering with the current state of the
197
+ # connection, which can be in an open transaction.
198
+ # Thankfully, we are not in a fast path.
199
+ # Note that any exceptions will be passed through the thread join() method.
200
+ Promiscuous.context :operation_recovery do
201
+ op.instance_eval do
202
+ @operation = operation
203
+ @read_dependencies = read_dependencies
204
+ @write_dependencies = write_dependencies
205
+ @op_lock = lock
206
+ @recovering = true
207
+
208
+ query = Promiscuous::Publisher::Operation::ProxyForQuery.new(self) { recover_db_operation }
209
+ execute_instrumented(query)
210
+ query.result
211
+ end
223
212
  end
224
- end
213
+ end.join
225
214
 
226
- lock.unlock
227
215
  rescue Exception => e
228
- message = "cannot recover #{lock.key} -> #{recovery_data}"
216
+ message = "cannot recover #{lock.key}, failed to fetch recovery data"
217
+ message = "cannot recover #{lock.key}, recovery data: #{recovery_data}" if recovery_data
229
218
  raise Promiscuous::Error::Recovery.new(message, e)
230
219
  end
231
220
 
232
- def increment_read_and_write_dependencies(read_dependencies, write_dependencies)
221
+ def increment_read_and_write_dependencies
233
222
  # We collapse all operations, ignoring the read/write interleaving.
234
223
  # It doesn't matter since all write operations are serialized, so the first
235
224
  # write in the transaction can have all the read dependencies.
@@ -241,92 +230,166 @@ class Promiscuous::Publisher::Operation::Base
241
230
  # r and w is empty) when it calculates the happens before relationships.
242
231
  r -= w
243
232
 
244
- master_node = w.first.redis_node
245
- operation_recovery_key = w.first
233
+ master_node = @op_lock.node
234
+ operation_recovery_key = "#{@op_lock.key}:operation_recovery"
246
235
 
247
236
  # We group all the dependencies by their respective shards
248
237
  # The master node will have the responsability to hold the recovery data.
249
238
  # We do the master node first. The seconaries can be done in parallel.
250
- (w+r).group_by(&:redis_node).each do |node, deps|
251
- r_deps = deps.select { |dep| dep.in? r }
252
- w_deps = deps.select { |dep| dep.in? w }
239
+ @committed_read_deps = []
240
+ @committed_write_deps = []
241
+
242
+ # We need to do the increments always in the same node order, otherwise.
243
+ # the subscriber can deadlock. But we must always put the recovery payload
244
+ # on the master before touching anything.
245
+ nodes_deps = (w+r).group_by(&:redis_node)
246
+ .sort_by { |node, deps| -Promiscuous::Redis.master.nodes.index(node) }
247
+ if nodes_deps.first[0] != master_node
248
+ nodes_deps = [[master_node, []]] + nodes_deps
249
+ end
253
250
 
251
+ nodes_deps.each do |node, deps|
254
252
  argv = []
255
253
  argv << Promiscuous::Key.new(:pub) # key prefixes
256
- argv << MultiJson.dump([r_deps, w_deps])
254
+ argv << operation_recovery_key
255
+
256
+ # The index of the first write is then used to pass to redis along with the
257
+ # dependencies. This is done because arguments to redis LUA scripts cannot
258
+ # accept complex data types.
259
+ argv << (deps.index(&:read?) || deps.length)
257
260
 
258
261
  # Each shard have their own recovery payload. The master recovery node
259
262
  # has the full operation recovery, and the others just have their versions.
260
- argv << operation_recovery_key.as_json
261
- if node == master_node
263
+ # Note that the operation_recovery_key on the secondaries have the current
264
+ # version of the instance appended to them. It's easier to cleanup when
265
+ # locks get lost.
266
+ if node == master_node && !self.recovering?
262
267
  # We are on the master node, which holds the recovery payload
263
- document = serialize_document_for_create_recovery if operation == :create
264
- argv << MultiJson.dump([@instance.class.promiscuous_collection_name,
265
- @instance.id, operation, document, r, w])
268
+ argv << MultiJson.dump([self.class.name, operation, r, w, self.recovery_payload])
266
269
  end
267
270
 
271
+ # FIXME If the lock is lost, we need to backoff
272
+
268
273
  # We are going to store all the versions in redis, to be able to recover.
269
274
  # We store all our increments in a transaction_id key in JSON format.
270
275
  # Note that the transaction_id is the id of the current instance.
271
276
  @@increment_script ||= Promiscuous::Redis::Script.new <<-SCRIPT
272
277
  local prefix = ARGV[1] .. ':'
273
- local deps = cjson.decode(ARGV[2])
274
- local read_deps = deps[1]
275
- local write_deps = deps[2]
276
- local operation_recovery_key = prefix .. ARGV[3] .. ':operation_recovery'
278
+ local operation_recovery_key = ARGV[2]
279
+ local versions_recovery_key = operation_recovery_key .. ':versions'
280
+ local first_read_index = tonumber(ARGV[3]) + 1
277
281
  local operation_recovery_payload = ARGV[4]
282
+ local deps = KEYS
278
283
 
279
- local read_versions = {}
280
- local write_versions = {}
284
+ local versions = {}
281
285
 
282
- if redis.call('exists', operation_recovery_key) == 1 then
283
- for i, dep in ipairs(read_deps) do
284
- local key = prefix .. dep
285
- read_versions[i] = redis.call('get', key .. ':w')
286
- end
287
- for i, dep in ipairs(write_deps) do
288
- local key = prefix .. dep
289
- write_versions[i] = redis.call('get', key .. ':w')
290
- end
291
- else
292
- for i, dep in ipairs(read_deps) do
293
- local key = prefix .. dep
294
- redis.call('incr', key .. ':rw')
295
- read_versions[i] = redis.call('get', key .. ':w')
296
- redis.call('hset', operation_recovery_key, dep, read_versions[i])
286
+ if redis.call('exists', versions_recovery_key) == 1 then
287
+ first_read_index = tonumber(redis.call('hget', versions_recovery_key, 'read_index'))
288
+ if not first_read_index then
289
+ return redis.error_reply('Failed to read dependency index during recovery')
297
290
  end
298
291
 
299
- for i, dep in ipairs(write_deps) do
300
- local key = prefix .. dep
301
- write_versions[i] = redis.call('incr', key .. ':rw')
302
- redis.call('set', key .. ':w', write_versions[i])
303
- redis.call('hset', operation_recovery_key, dep, write_versions[i])
292
+ for i, dep in ipairs(deps) do
293
+ versions[i] = tonumber(redis.call('hget', versions_recovery_key, dep))
294
+ if not versions[i] then
295
+ return redis.error_reply('Failed to read dependency ' .. dep .. ' during recovery')
296
+ end
304
297
  end
305
298
 
306
- if operation_recovery_payload then
307
- redis.call('hset', operation_recovery_key, 'payload', operation_recovery_payload)
299
+ return { first_read_index-1, versions }
300
+ end
301
+
302
+ if redis.call('exists', prefix .. 'bootstrap') == 1 then
303
+ first_read_index = #deps + 1
304
+ end
305
+
306
+ if #deps ~= 0 then
307
+ redis.call('hset', versions_recovery_key, 'read_index', first_read_index)
308
+ end
309
+
310
+ for i, dep in ipairs(deps) do
311
+ local key = prefix .. dep
312
+ local rw_version = redis.call('incr', key .. ':rw')
313
+ if i < first_read_index then
314
+ redis.call('set', key .. ':w', rw_version)
315
+ versions[i] = rw_version
316
+ else
317
+ versions[i] = tonumber(redis.call('get', key .. ':w')) or 0
308
318
  end
319
+ redis.call('hset', versions_recovery_key, dep, versions[i])
309
320
  end
310
321
 
311
- return { read_versions, write_versions }
322
+ if operation_recovery_payload then
323
+ redis.call('set', operation_recovery_key, operation_recovery_payload)
324
+ end
325
+
326
+ return { first_read_index-1, versions }
312
327
  SCRIPT
313
- read_versions, write_versions = @@increment_script.eval(node, :argv => argv)
314
328
 
315
- r_deps.zip(read_versions).each { |dep, version| dep.version = version.to_i }
316
- w_deps.zip(write_versions).each { |dep, version| dep.version = version.to_i }
329
+ first_read_index, versions = @@increment_script.eval(node, :argv => argv, :keys => deps)
330
+
331
+ deps.zip(versions).each { |dep, version| dep.version = version }
332
+
333
+ @committed_write_deps += deps[0...first_read_index]
334
+ @committed_read_deps += deps[first_read_index..-1]
317
335
  end
318
336
 
319
- @committed_read_deps = r
320
- @committed_write_deps = w
321
- @instance_version = w.first.version
337
+ # The instance version must to be the first in the list to allow atomic
338
+ # subscribers to do their magic.
339
+ # TODO What happens with transactions with multiple operations?
340
+ instance_dep_index = @committed_write_deps.index(write_dependencies.first)
341
+ @committed_write_deps[0], @committed_write_deps[instance_dep_index] =
342
+ @committed_write_deps[instance_dep_index], @committed_write_deps[0]
322
343
  end
323
344
 
324
- LOCK_OPTIONS = { :timeout => 10.seconds, # after 10 seconds, we give up
325
- :sleep => 0.01, # polling every 10ms.
326
- :expire => 1.minute } # after one minute, we are considered dead
327
-
328
345
  def self.lock_options
329
- LOCK_OPTIONS.merge({ :lock_set => Promiscuous::Key.new(:pub).join('lock_set').to_s })
346
+ {
347
+ :timeout => 10.seconds, # after 10 seconds, we give up so we don't queue requests
348
+ :sleep => 0.01.seconds, # polling every 10ms.
349
+ :expire => 1.minute, # after one minute, we are considered dead
350
+ :lock_set => Promiscuous::Key.new(:pub).join('lock_set').to_s
351
+ }
352
+ end
353
+ delegate :lock_options, :to => self
354
+
355
+ def dependency_for_op_lock
356
+ query_dependencies.first
357
+ end
358
+
359
+ def get_new_op_lock
360
+ dep = dependency_for_op_lock
361
+ Promiscuous::Redis::Mutex.new(dep.key(:pub).to_s, lock_options.merge(:node => dep.redis_node))
362
+ end
363
+
364
+ def self._acquire_lock(mutex)
365
+ loop do
366
+ case mutex.lock
367
+ # recover_operation_from_lock implicitely unlocks the lock.
368
+ when :recovered then recover_operation_from_lock(mutex)
369
+ when true then return true
370
+ when false then return false
371
+ end
372
+ end
373
+ end
374
+
375
+ def acquire_op_lock
376
+ @op_lock = get_new_op_lock
377
+
378
+ unless self.class._acquire_lock(@op_lock)
379
+ raise Promiscuous::Error::LockUnavailable.new(@op_lock.key)
380
+ end
381
+ end
382
+
383
+ def release_op_lock
384
+ @op_lock.unlock
385
+ @op_lock = nil
386
+ end
387
+
388
+ def ensure_op_still_locked
389
+ unless @op_lock.still_locked?
390
+ # We lost the lock, let the recovery mechanism do its thing.
391
+ raise Promiscuous::Error::LostLock.new(@op_lock.key)
392
+ end
330
393
  end
331
394
 
332
395
  def self.recover_locks
@@ -340,368 +403,110 @@ class Promiscuous::Publisher::Operation::Base
340
403
  break unless key && Time.now.to_i >= time.to_i + lock_options[:expire]
341
404
 
342
405
  mutex = Promiscuous::Redis::Mutex.new(key, lock_options.merge(:node => node))
343
- case mutex.lock
344
- when :recovered then recover_operation_from_lock(mutex)
345
- when true then mutex.unlock
346
- when false then ;
347
- end
406
+ mutex.unlock if _acquire_lock(mutex)
348
407
  end
349
408
  end
350
409
  end
410
+ register_recovery_mechanism :recover_locks
351
411
 
352
- def locks_from_write_dependencies
353
- # XXX TODO Support multi row writes
354
- instance_dep = write_dependencies.first
355
- return [] unless instance_dep
356
- options = self.class.lock_options.merge(:node => instance_dep.redis_node)
357
- [Promiscuous::Redis::Mutex.new(instance_dep.key(:pub).to_s, options)]
358
- end
359
-
360
- def lock_write_dependencies
361
- # returns true if we could get all the locks, false otherwise
362
-
363
- start_at = Time.now
364
- @recovered_locks = []
365
-
366
- # We acquire all the locks in order, and unlock everything if one come
367
- # to fail. lock/unlock return true/false when they succeed/fail
368
- locks = locks_from_write_dependencies
369
- locks.reduce(->{ @locks = locks; true }) do |chain, l|
370
- lambda do
371
- return false if Time.now - start_at > LOCK_OPTIONS[:timeout]
372
- case l.lock
373
- # Note that we do not unlock the recovered lock if the chain fails
374
- when :recovered then @recovered_locks << l; chain.call
375
- when true then chain.call or (l.unlock; false)
376
- when false then @unavailable_lock = l; false
377
- end
378
- end
379
- end.call
380
- end
381
-
382
- def unlock_write_dependencies
383
- # returns true if we could unlock all the locks, false otherwise
384
- return true if @locks.blank?
385
- @locks.reduce(true) { |result, l| l.unlock && result }.tap { @locks = nil }
386
- end
412
+ def dependencies_for(instance, options={})
413
+ return [] if instance.nil?
387
414
 
388
- def _reload_instance_dependencies
389
415
  if read?
390
416
  # We want to use the smallest subset that we can depend on when doing
391
417
  # reads. tracked_dependencies comes sorted from the smallest subset to
392
418
  # the largest. For maximum performance on the subscriber side, we thus
393
419
  # pick the first one. In most cases, it should resolve to the id
394
420
  # dependency.
395
- best_dependency = @instance.promiscuous.tracked_dependencies.first
396
- unless best_dependency
397
- raise Promiscuous::Error::Dependency.new(:operation => self)
398
- end
399
- [best_dependency]
421
+ # If we don't have any, the driver should track individual instances.
422
+ best_dependency = instance.promiscuous.tracked_dependencies(:allow_missing_attributes => true).first
423
+ [best_dependency].compact
400
424
  else
401
425
  # Note that tracked_dependencies will not return the id dependency if it
402
426
  # doesn't exist which can only happen for create operations and auto
403
- # generated ids. Be aware that with auto generated id, create operation
404
- # might not provide the id dependency.
405
- @instance.promiscuous.tracked_dependencies
427
+ # generated ids.
428
+ instance.promiscuous.tracked_dependencies
406
429
  end
407
430
  end
408
431
 
409
- def reload_instance_dependencies
410
- # Returns true when the dependencies changed, false otherwise
411
- @write_dependencies = nil
412
- old = @instance_dependencies
413
- @instance_dependencies = _reload_instance_dependencies
414
- old != @instance_dependencies
415
- end
416
-
417
- def instance_dependencies
418
- reload_instance_dependencies unless @instance_dependencies
419
- @instance_dependencies
420
- end
421
-
422
- def previous_successful_operations
423
- current_context.operations.reject(&:failed?)
424
- end
425
-
426
432
  def read_dependencies
427
433
  # We memoize the read dependencies not just for performance, but also
428
434
  # because we store the versions once incremented in these.
429
435
  return @read_dependencies if @read_dependencies
430
- read_dependencies = previous_successful_operations.select(&:read?)
431
- .map(&:instance_dependencies).flatten
436
+ read_dependencies = current_context.read_operations.map(&:query_dependencies).flatten
432
437
 
433
- # We implicitly have a read dependency on the latest write.
434
- if current_context.last_write_dependency
435
- current_context.last_write_dependency.version = nil
436
- read_dependencies << current_context.last_write_dependency
438
+ # We add extra_dependencies, which can contain the latest write, or user
439
+ # context, etc.
440
+ current_context.extra_dependencies.each do |dep|
441
+ dep.version = nil
442
+ read_dependencies << dep
437
443
  end
438
444
 
439
- @read_dependencies = read_dependencies.uniq
445
+ @read_dependencies = read_dependencies.uniq.each { |d| d.type = :read }
440
446
  end
441
- alias verify_read_dependencies read_dependencies
447
+ alias generate_read_dependencies read_dependencies
442
448
 
443
449
  def write_dependencies
444
- # The cache is cleared when we call reload_instance_dependencies
445
- @write_dependencies ||= previous_successful_operations.select(&:write?)
446
- .map(&:instance_dependencies).flatten.uniq
450
+ @write_dependencies ||= self.query_dependencies.uniq.each { |d| d.type = :write }
447
451
  end
448
452
 
449
- def reload_instance
450
- @instance = without_promiscuous { fetch_instance }
453
+ def should_instrument_query?
454
+ # current_context is later enforced for writes.
455
+ !Promiscuous.disabled? && (current_context || write?)
451
456
  end
452
457
 
453
- def perform_db_operation_with_no_exceptions(&db_operation)
454
- going_to_execute_db_operation
455
- @result = db_operation.call(self)
456
- rescue Exception => e
457
- @exception = e
458
- end
459
-
460
- def lock_instance_for_execute_persistent
461
- current_context.add_operation(self)
458
+ def execute(&query_config)
459
+ query = Promiscuous::Publisher::Operation::ProxyForQuery.new(self, &query_config)
462
460
 
463
- # Note: At first, @instance can be a representation of a selector, to
464
- # become a real model instance once we get to fetch it from the db with
465
- # reload_instance to lock an instance that matches the selector.
466
- # This is a good thing because we allow the underlying driver to hook from
467
- # the model interface to the driver interface easily.
468
- auto_unlock = true
469
-
470
- begin
471
- unless lock_write_dependencies
472
- raise Promiscuous::Error::LockUnavailable.new(@unavailable_lock.key)
473
- end
474
-
475
- if @recovered_locks.present?
476
- # When recovering locks, if we fail, we must not release the lock again
477
- # to allow another one to do the recovery.
478
- auto_unlock = false
479
- @recovered_locks.each { |lock| self.class.recover_operation_from_lock(lock) }
480
- auto_unlock = true
481
- raise TryAgain
482
- end
483
-
484
- if operation != :create
485
- # We need to lock and update all the dependencies before any other
486
- # readers can see our write through any one of our tracked attributes.
487
-
488
- # We want to reload the instance to make sure we have all the locked
489
- # dependencies that we need. It's a query we cannot avoid when we have
490
- # tracked dependencies. There is a bit of room for optimization.
491
- # If the selector doesn't fetch any instance, the query has no effect
492
- # so we can bypass it as if nothing happened. If reload_instance
493
- # raises an exception, it's okay to let it bubble up since we haven't
494
- # touch anything yet except for the locks (which will be unlocked on
495
- # the way out)
496
- return false unless reload_instance
497
-
498
- # If reload_instance changed the current instance because the selector,
499
- # we need to unlock the old instance, lock this new instance, and
500
- # retry. XXX What should we do if we are going in a live lock?
501
- # Sleep with some jitter?
502
- if reload_instance_dependencies
503
- raise TryAgain
504
- end
505
- end
506
- rescue TryAgain
507
- unlock_write_dependencies if auto_unlock
508
- retry
509
- end
510
-
511
- verify_read_dependencies
512
- if write_dependencies.blank?
513
- # TODO We don't like auto generated ids. A good solution is to do all
514
- # writes in a transaction, so we can know the ids at commit time.
515
- raise "We don't support auto generated id yet"
516
- end
517
-
518
- # We are now in the possession of an instance that matches the original
519
- # selector, we can proceed.
520
- auto_unlock = false
521
- true
522
- ensure
523
- # In case of an exception was raised before we updated the version in
524
- # redis, we can unlock because we don't need recovery.
525
- unlock_write_dependencies if auto_unlock
526
- end
527
-
528
- def execute_persistent_locked(&db_operation)
529
- # We are going to commit all the pending writes in the context if we are
530
- # doing a transaction commit. We also commit the current write operation for
531
- # atomic writes without transactions. We enable the recovery mechanism by
532
- # having someone expiring our lock if we die in the middle.
533
-
534
- # All the versions are updated and a marked as pending for publish in Redis
535
- # atomically in case we die before we could write the versions in the
536
- # database. Once incremented, concurrent queries that are reading our
537
- # instance will be serialized after our write, even through it may read our
538
- # old instance. This is a race that we tolerate.
539
- # XXX We also stash the document for create operations, so the recovery can
540
- # redo the create to avoid races when instances are getting partitioned.
541
- increment_read_and_write_dependencies(read_dependencies, write_dependencies)
542
-
543
- # From this point, if we die, the one expiring our write locks must finish
544
- # the publish, either by sending a dummy, or by sending the real instance.
545
- # We could have die before or after the database query.
546
-
547
- # We save the versions in the database, as it is our source of truth.
548
- # This allow a reconstruction of redis in the face of failures.
549
- # We would also need to send a special message to the subscribers to reset
550
- # their read counters to the last write version since we would not be able
551
- # to restore the read counters (and we don't want to store them because
552
- # this would dramatically augment our footprint on the db).
553
- #
554
- # If we are doing a destroy operation, and redis dies right after, and
555
- # we happen to lost contact with rabbitmq, recovery is going to be complex:
556
- # we would need to do a diff from the dummy subscriber to see what
557
- # documents are missing on our side to be able to resend the destroy
558
- # message.
559
-
560
- case operation
561
- when :create
562
- stash_version_in_write_query
563
- when :update
564
- stash_version_in_write_query
565
- # We are now in the possession of an instance that matches the original
566
- # selector. We need to make sure the db_operation will operate on it,
567
- # instead of the original selector.
568
- use_id_selector(:use_atomic_version_selector => true)
569
- # We need to use an atomic versioned selector to make sure that
570
- # if we lose the lock for a long period of time, we don't mess up
571
- # with other people's updates. Also we make sure that the recovery
572
- # mechanism is not racing with us.
573
- when :destroy
574
- use_id_selector(:use_atomic_version_selector => true)
575
- end
576
-
577
- # Perform the actual database query (single write or transaction commit).
578
- # If successful, the result goes in @result, otherwise, @exception contains
579
- # the thrown exception.
580
- perform_db_operation_with_no_exceptions(&db_operation)
581
-
582
- # We take a timestamp right after the write is performed because latency
583
- # measurements are performed on the subscriber.
584
- record_timestamp
585
-
586
- if operation == :update && !failed?
587
- # The underlying driver should implement some sort of find and modify
588
- # operation in the previous write query to avoid this extra read query.
589
- # If reload_instance raise an exception, we let it bubble up,
590
- # and we'll trigger the recovery mechanism.
591
- use_id_selector
592
- reload_instance
593
- end
594
-
595
- unless @locks.first.still_locked?
596
- # We lost the lock, let the recovery mechanism do its thing.
597
- # This is a code optimization to avoid checking if the db operation
598
- # succeeded or not because of the db operation race during recovery.
599
- raise Promiscuous::Error::LostLock.new(@locks.first.key)
461
+ if should_instrument_query?
462
+ raise Promiscuous::Error::MissingContext if !current_context && write?
463
+ execute_instrumented(query)
464
+ else
465
+ query.call_and_remember_result(:non_instrumented)
600
466
  end
601
467
 
602
- generate_payload_and_clear_operations
603
-
604
- # As soon as we unlock the locks, the rescuer will not be able to assume
605
- # that the database instance is still pristine, and so we need to stash the
606
- # payload in redis. If redis dies, we don't care because it can be
607
- # reconstructed. Subscribers can see "compressed" updates.
608
- publish_payload_in_redis
609
-
610
- # TODO Performance: merge these 3 redis operations to speed things up.
611
- unlock_write_dependencies
612
-
613
- # If we die from this point on, a recovery worker can republish our payload
614
- # since we queued it in Redis.
615
-
616
- # We don't care if we lost the lock and got recovered, subscribers are
617
- # immune to duplicate messages.
618
- publish_payload_in_rabbitmq_async
468
+ query.result
619
469
  end
620
470
 
621
- # --- the following methods can be overridden by the driver --- #
622
-
623
- def execute_persistent(&db_operation)
624
- return nil unless lock_instance_for_execute_persistent
625
- execute_persistent_locked(&db_operation)
471
+ def query_dependencies
472
+ # Returns the list of dependencies that are involved in the database query.
473
+ # For an atomic write operation, the first one returned must be the one
474
+ # corresponding to the primary key.
475
+ raise
626
476
  end
627
477
 
628
- def execute_non_persistent(&db_operation)
629
- # We are getting here in the following cases:
630
- # * read: we fetch the instance. It's the driver's job to cache the
631
- # raw instance and return it during db_operation.
632
- # * multi read: nothing to do, we'll keep our current selector, sadly
633
- # * write in a transaction: TODO
634
-
635
- if single?
636
- # If the query misses, we don't bother
637
- return nil unless reload_instance
638
- use_id_selector
639
- end
640
-
641
- # We don't do any reload_instance_dependencies at this point (and thus we
642
- # won't raise an exception on a multi read that we cannot track).
643
- # We'll wait until the commit, and hopefully with tainting, we'll be able to
644
- # tell if we should depend the multi read operation in question.
645
- perform_db_operation_with_no_exceptions(&db_operation)
646
- # If the db_operation raises, we don't consider this failed operation when
647
- # committing the next persistent write by omitting the operation in the
648
- # context.
649
- current_context.add_operation(self) unless failed?
650
- end
651
-
652
- def execute(&db_operation)
653
- # execute returns the result of the db_operation to perform
654
- db_operation ||= proc {}
655
- return db_operation.call if Promiscuous.disabled
656
-
657
- unless current_context
658
- raise Promiscuous::Error::MissingContext if write?
659
- return db_operation.call # Don't care for a read
660
- end
661
-
662
- self.persists? ? execute_persistent(&db_operation) :
663
- execute_non_persistent(&db_operation)
664
-
665
- @exception ? (raise @exception) : @result
478
+ def execute_instrumented(db_operation)
479
+ # Implemented by subclasses
480
+ raise
666
481
  end
667
482
 
668
- def fetch_instance
669
- # This method is overridden to use the original query selector.
670
- # Should return nil if the instance is not found.
671
- @instance
483
+ def operation_payloads
484
+ # subclass can use payloads_for to generate the payload
485
+ raise
672
486
  end
673
487
 
674
- def serialize_document_for_create_recovery
675
- # Overridden to be able to redo the create during recovery.
676
- nil
488
+ def recovery_payload
489
+ # Overridden to be able to recover the operation
490
+ []
677
491
  end
678
492
 
679
- def self.recover_operation(model, instance_id, document)
680
- # Overriden to reconstruct the operation. If the database is read, only the
681
- # primary must be used.
682
- new(:instance => model.new { |instance| instance.id = instance_id })
493
+ def self.recover_operation(*recovery_payload)
494
+ # Overridden to reconstruct the operation.
683
495
  end
684
496
 
685
497
  def recover_db_operation
686
- # Overriden to reexecute the db operation during recovery (or make sure that
498
+ # Overridden to reexecute the db operation during recovery (or make sure that
687
499
  # it will never succeed).
688
500
  end
689
501
 
690
- def use_id_selector(options={})
691
- # Overridden to use the {:id => @instance.id} selector.
692
- # if use_atomic_version_selector is passed, the driver must
693
- # add the VERSION_FIELD selector if present in original instance.
694
- end
695
-
696
- def use_versioned_selector
697
- # Overridden to use the {VERSION_FIELD => @instance[VERSION_FIELD]} selector.
698
- end
699
-
700
- def stash_version_in_write_query
701
- # Overridden to update the query to set 'instance.VERSION_FIELD = @instance_version'
502
+ def trace_operation
503
+ if ENV['TRACE']
504
+ msg = self.explain_operation(70)
505
+ current_context.trace(msg, :color => self.read? ? '0;32' : '1;31')
506
+ end
702
507
  end
703
508
 
704
- def going_to_execute_db_operation
705
- # Test hook
509
+ def explain_operation(max_width)
510
+ "Unknown database operation"
706
511
  end
707
512
  end