promiscuous 0.90.0 → 0.91.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. checksums.yaml +7 -0
  2. data/lib/promiscuous/amqp/bunny.rb +63 -36
  3. data/lib/promiscuous/amqp/fake.rb +3 -1
  4. data/lib/promiscuous/amqp/hot_bunnies.rb +26 -16
  5. data/lib/promiscuous/amqp/null.rb +1 -0
  6. data/lib/promiscuous/amqp.rb +12 -12
  7. data/lib/promiscuous/cli.rb +70 -29
  8. data/lib/promiscuous/config.rb +54 -29
  9. data/lib/promiscuous/convenience.rb +1 -1
  10. data/lib/promiscuous/dependency.rb +25 -6
  11. data/lib/promiscuous/error/connection.rb +11 -9
  12. data/lib/promiscuous/error/dependency.rb +8 -1
  13. data/lib/promiscuous/loader.rb +4 -2
  14. data/lib/promiscuous/publisher/bootstrap/connection.rb +25 -0
  15. data/lib/promiscuous/publisher/bootstrap/data.rb +127 -0
  16. data/lib/promiscuous/publisher/bootstrap/mode.rb +19 -0
  17. data/lib/promiscuous/publisher/bootstrap/status.rb +40 -0
  18. data/lib/promiscuous/publisher/bootstrap/version.rb +46 -0
  19. data/lib/promiscuous/publisher/bootstrap.rb +27 -0
  20. data/lib/promiscuous/publisher/context/base.rb +67 -0
  21. data/lib/promiscuous/{middleware.rb → publisher/context/middleware.rb} +16 -13
  22. data/lib/promiscuous/publisher/context/transaction.rb +36 -0
  23. data/lib/promiscuous/publisher/context.rb +4 -88
  24. data/lib/promiscuous/publisher/mock_generator.rb +9 -9
  25. data/lib/promiscuous/publisher/model/active_record.rb +7 -7
  26. data/lib/promiscuous/publisher/model/base.rb +29 -29
  27. data/lib/promiscuous/publisher/model/ephemeral.rb +5 -3
  28. data/lib/promiscuous/publisher/model/mock.rb +9 -5
  29. data/lib/promiscuous/publisher/model/mongoid.rb +5 -22
  30. data/lib/promiscuous/publisher/operation/active_record.rb +360 -0
  31. data/lib/promiscuous/publisher/operation/atomic.rb +167 -0
  32. data/lib/promiscuous/publisher/operation/base.rb +279 -474
  33. data/lib/promiscuous/publisher/operation/mongoid.rb +153 -145
  34. data/lib/promiscuous/publisher/operation/non_persistent.rb +28 -0
  35. data/lib/promiscuous/publisher/operation/proxy_for_query.rb +42 -0
  36. data/lib/promiscuous/publisher/operation/transaction.rb +85 -0
  37. data/lib/promiscuous/publisher/operation.rb +1 -1
  38. data/lib/promiscuous/publisher/worker.rb +7 -7
  39. data/lib/promiscuous/publisher.rb +1 -1
  40. data/lib/promiscuous/railtie.rb +20 -5
  41. data/lib/promiscuous/redis.rb +104 -56
  42. data/lib/promiscuous/subscriber/message_processor/base.rb +38 -0
  43. data/lib/promiscuous/subscriber/message_processor/bootstrap.rb +17 -0
  44. data/lib/promiscuous/subscriber/message_processor/regular.rb +192 -0
  45. data/lib/promiscuous/subscriber/message_processor.rb +4 -0
  46. data/lib/promiscuous/subscriber/model/base.rb +20 -15
  47. data/lib/promiscuous/subscriber/model/mongoid.rb +4 -4
  48. data/lib/promiscuous/subscriber/model/observer.rb +16 -2
  49. data/lib/promiscuous/subscriber/operation/base.rb +68 -0
  50. data/lib/promiscuous/subscriber/operation/bootstrap.rb +54 -0
  51. data/lib/promiscuous/subscriber/operation/regular.rb +13 -0
  52. data/lib/promiscuous/subscriber/operation.rb +3 -166
  53. data/lib/promiscuous/subscriber/worker/message.rb +61 -35
  54. data/lib/promiscuous/subscriber/worker/message_synchronizer.rb +90 -59
  55. data/lib/promiscuous/subscriber/worker/pump.rb +17 -5
  56. data/lib/promiscuous/subscriber/worker/recorder.rb +4 -1
  57. data/lib/promiscuous/subscriber/worker/runner.rb +49 -9
  58. data/lib/promiscuous/subscriber/worker/stats.rb +2 -2
  59. data/lib/promiscuous/subscriber/worker.rb +6 -0
  60. data/lib/promiscuous/subscriber.rb +1 -1
  61. data/lib/promiscuous/timer.rb +31 -18
  62. data/lib/promiscuous/version.rb +1 -1
  63. data/lib/promiscuous.rb +23 -3
  64. metadata +104 -89
  65. data/lib/promiscuous/subscriber/payload.rb +0 -34
@@ -1,41 +1,31 @@
1
1
  class Promiscuous::Publisher::Operation::Base
2
- class TryAgain < RuntimeError; end
3
- VERSION_FIELD = '_pv'
2
+ mattr_accessor :recovery_mechanisms
3
+ self.recovery_mechanisms = []
4
4
 
5
- attr_accessor :operation, :operation_ext, :instance, :selector_keys
6
-
7
- def initialize(options={})
8
- # XXX instance is not always an instance, it can be a selector
9
- # representation.
10
- @instance = options[:instance]
11
- @operation = options[:operation]
12
- @operation_ext = options[:operation_ext]
13
- @multi = options[:multi]
5
+ def self.register_recovery_mechanism(method_name=nil, &block)
6
+ self.recovery_mechanisms << (block || method(method_name))
14
7
  end
15
8
 
16
- def read?
17
- operation == :read
9
+ def self.run_recovery_mechanisms
10
+ self.recovery_mechanisms.each(&:call)
18
11
  end
19
12
 
20
- def write?
21
- !read?
22
- end
13
+ attr_accessor :operation
23
14
 
24
- def multi?
25
- !!@multi
15
+ def initialize(options={})
16
+ @operation = options[:operation]
26
17
  end
27
18
 
28
- def single?
29
- !@multi
19
+ def read?
20
+ @operation == :read
30
21
  end
31
22
 
32
- def persists?
33
- # TODO For writes in transactions, it should be false
34
- write?
23
+ def write?
24
+ !read?
35
25
  end
36
26
 
37
- def failed?
38
- !!@exception
27
+ def recovering?
28
+ !!@recovering
39
29
  end
40
30
 
41
31
  def current_context
@@ -67,7 +57,7 @@ class Promiscuous::Publisher::Operation::Base
67
57
  end
68
58
 
69
59
  def publish_payload_in_rabbitmq_async
70
- Promiscuous::AMQP.publish(:key => @amqp_key, :payload => @payload,
60
+ Promiscuous::AMQP.publish(:key => Promiscuous::Config.app, :payload => @payload,
71
61
  :on_confirm => method(:on_rabbitmq_confirm))
72
62
  end
73
63
 
@@ -87,33 +77,37 @@ class Promiscuous::Publisher::Operation::Base
87
77
  node.zadd(rabbitmq_staging_set_key, Time.now.to_i, key)
88
78
  payload = node.get(key)
89
79
 
90
- Promiscuous.info "[payload recovery] #{payload}"
91
- new.instance_eval do
92
- @payload_recovery_node = node
93
- @payload_recovery_key = key
94
- @amqp_key = MultiJson.load(payload)['__amqp__']
95
- @payload = payload
96
- publish_payload_in_rabbitmq_async
80
+ # It's possible that the payload is nil as the message could be
81
+ # recovered by another worker
82
+ if payload
83
+ Promiscuous.info "[payload recovery] #{payload}"
84
+ new.instance_eval do
85
+ @payload_recovery_node = node
86
+ @payload_recovery_key = key
87
+ @payload = payload
88
+ @recovery = true
89
+ publish_payload_in_rabbitmq_async
90
+ end
97
91
  end
98
92
  end
99
93
  end
100
94
  end
95
+ register_recovery_mechanism :recover_payloads_for_rabbitmq
101
96
 
102
97
  def publish_payload_in_redis
103
98
  # TODO Optimize and DRY this up
104
99
  r = @committed_read_deps
105
100
  w = @committed_write_deps
106
101
 
107
- master_node = w.first.redis_node
108
- operation_recovery_key = w.first.key(:pub).join('operation_recovery').to_s
109
- # We identify a payload with a unique key (id:id_value:current_version) to
110
- # avoid collisions with other updates on the same document.
102
+ # We identify a payload with a unique key (id:id_value:current_version:payload_recovery)
103
+ # to avoid collisions with other updates on the same document.
104
+ master_node = @op_lock.node
111
105
  @payload_recovery_node = master_node
112
- @payload_recovery_key = w.first.key(:pub).join(w.first.version).to_s
106
+ @payload_recovery_key = Promiscuous::Key.new(:pub).join('payload_recovery', @op_lock.token).to_s
113
107
 
114
108
  # We need to be able to recover from a redis failure. By sending the
115
109
  # payload to the slave first, we ensure that we can replay the lost
116
- # payloads if the primary came to fail.
110
+ # payloads if the master came to fail.
117
111
  # We still need to recover the lost operations. This can be done by doing a
118
112
  # version diff from what is stored in the database and the recovered redis slave.
119
113
  # XXX TODO
@@ -123,113 +117,108 @@ class Promiscuous::Publisher::Operation::Base
123
117
  # happen if we lost the lock without knowing about it.
124
118
  # The payload can be sent twice, which is okay since the subscribers
125
119
  # tolerate it.
120
+ operation_recovery_key = "#{@op_lock.key}:operation_recovery"
121
+ versions_recovery_key = "#{operation_recovery_key}:versions"
126
122
 
127
- nodes = (w+r).map(&:redis_node).uniq
128
- if nodes.size == 1
129
- # We just have the master node. Since we are atomic, we don't need to do
130
- # the 2pc dance.
131
- master_node.multi do
132
- master_node.del(operation_recovery_key)
133
- master_node.set(@payload_recovery_key, @payload)
134
- master_node.zadd(rabbitmq_staging_set_key, Time.now.to_i, @payload_recovery_key)
135
- end
136
- else
137
- master_node.multi do
138
- master_node.set(@payload_recovery_key, @payload)
139
- master_node.zadd(rabbitmq_staging_set_key, Time.now.to_i, @payload_recovery_key)
140
- end
141
-
142
- # The payload is safe now. We can cleanup all the versions on the
143
- # secondary. Note that we need to clear the master node at the end,
144
- # as it acts as a lock on the other keys. This is important to avoid a
145
- # race where we would delete data that doesn't belong to the current
146
- # operation due to a lock loss.
147
- nodes.reject { |node| node == master_node }
148
- .each { |node| node.del(operation_recovery_key) }
123
+ master_node.multi do
124
+ master_node.set(@payload_recovery_key, @payload)
125
+ master_node.zadd(rabbitmq_staging_set_key, Time.now.to_i, @payload_recovery_key)
149
126
  master_node.del(operation_recovery_key)
127
+ master_node.del(versions_recovery_key)
150
128
  end
129
+
130
+ # The payload is safe now. We can cleanup all the versions on the
131
+ # secondary. There are no harmful races that can happen since the
132
+ # secondary_operation_recovery_key is unique to the operation.
133
+ # XXX The caveat is that if we die here, the
134
+ # secondary_operation_recovery_key will never be cleaned up.
135
+ (w+r).map(&:redis_node).uniq
136
+ .reject { |node| node == master_node }
137
+ .each { |node| node.del(versions_recovery_key) }
151
138
  end
152
139
 
153
- def generate_payload_and_clear_operations
154
- # TODO Transactions with multi writes
155
- raise "We don't support multi writes yet" if previous_successful_operations.select(&:write?).size > 1
156
- raise "The instance is gone, or there is a version mismatch" unless @instance
140
+ def payload_for(instance)
141
+ options = { :with_attributes => self.operation.in?([:create, :update]) }
142
+ instance.promiscuous.payload(options).tap do |payload|
143
+ payload[:operation] = self.operation
144
+ end
145
+ end
157
146
 
158
- payload = @instance.promiscuous.payload(:with_attributes => operation.in?([:create, :update]))
147
+ def generate_payload
148
+ payload = {}
149
+ payload[:operations] = operation_payloads
159
150
  payload[:context] = current_context.name
151
+ payload[:app] = Promiscuous::Config.app
160
152
  payload[:timestamp] = @timestamp
161
-
162
- # If the db operation has failed, so we publish a dummy operation on the
163
- # failed instance. It's better than using the Dummy polisher class
164
- # because a subscriber can choose not to receive any of these messages.
165
- payload[:operation] = self.failed? ? :dummy : operation
166
-
167
- # We need to consider the last write operation as an implicit read
168
- # dependency. This is why we don't need to consider the read dependencies
169
- # happening before a first write when publishing the second write in a
170
- # context.
153
+ payload[:host] = Socket.gethostname
154
+ payload[:current_user_id] = Thread.current[:promiscuous_context].try(:current_user_id)
171
155
  payload[:dependencies] = {}
172
156
  payload[:dependencies][:read] = @committed_read_deps if @committed_read_deps.present?
173
157
  payload[:dependencies][:write] = @committed_write_deps
174
158
 
175
- current_context.last_write_dependency = @committed_write_deps.first
176
- current_context.operations.clear
177
-
178
- @amqp_key = payload[:__amqp__]
179
159
  @payload = MultiJson.dump(payload)
180
160
  end
181
161
 
162
+ def clear_previous_dependencies
163
+ current_context.read_operations.clear
164
+ current_context.extra_dependencies = [@committed_write_deps.first]
165
+ end
166
+
182
167
  def self.recover_operation_from_lock(lock)
183
168
  # We happen to have acquired a never released lock.
184
169
  # The database instance is thus still prestine.
185
- # Three cases to consider:
186
- # 1) the key is not an id dependency or the payload queue stage was passed
187
- # 2) The write query was never executed, we must send a dummy operation
188
- # 3) The write query was executed, but never passed the payload queue stage
189
170
 
190
171
  master_node = lock.node
191
- recovery_data = master_node.hgetall("#{lock.key}:operation_recovery")
192
- return nil unless recovery_data.present? # case 1)
172
+ recovery_data = master_node.get("#{lock.key}:operation_recovery")
173
+
174
+ unless recovery_data.present?
175
+ lock.unlock
176
+ return
177
+ end
193
178
 
194
179
  Promiscuous.info "[operation recovery] #{lock.key} -> #{recovery_data}"
195
180
 
196
- collection, instance_id, operation,
197
- document, read_dependencies, write_dependencies = *MultiJson.load(recovery_data['payload'])
181
+ op_klass, operation, read_dependencies,
182
+ write_dependencies, recovery_arguments = *MultiJson.load(recovery_data)
198
183
 
199
184
  operation = operation.to_sym
200
- read_dependencies.map! { |k| Promiscuous::Dependency.parse(k.to_s) }
201
- write_dependencies.map! { |k| Promiscuous::Dependency.parse(k.to_s) }
185
+ read_dependencies.map! { |k| Promiscuous::Dependency.parse(k.to_s, :type => :read) }
186
+ write_dependencies.map! { |k| Promiscuous::Dependency.parse(k.to_s, :type => :write) }
202
187
 
203
- model = Promiscuous::Publisher::Model.publishers[collection]
204
-
205
- if model.is_a? Promiscuous::Publisher::Model::Ephemeral
206
- operation = :dummy
207
- else
208
- # TODO Abstract db operations.
209
- # We need to query on the root model
210
- model = model.collection.name.singularize.camelize.constantize
188
+ begin
189
+ op = op_klass.constantize.recover_operation(*recovery_arguments)
190
+ rescue NameError
191
+ raise "invalid recover operation class: #{op_klass}"
211
192
  end
212
193
 
213
- op_klass = model.get_operation_class_for(operation)
214
- op = op_klass.recover_operation(model, instance_id, document)
215
- op.operation = operation
216
-
217
- Promiscuous.context :operation_recovery, :detached_from_parent => true do
218
- op.instance_eval do
219
- @read_dependencies = read_dependencies
220
- @write_dependencies = write_dependencies
221
- @locks = [lock]
222
- execute_persistent_locked { recover_db_operation }
194
+ Thread.new do
195
+ # We run the recovery in another thread to ensure that we get a new
196
+ # database connection to avoid tempering with the current state of the
197
+ # connection, which can be in an open transaction.
198
+ # Thankfully, we are not in a fast path.
199
+ # Note that any exceptions will be passed through the thread join() method.
200
+ Promiscuous.context :operation_recovery do
201
+ op.instance_eval do
202
+ @operation = operation
203
+ @read_dependencies = read_dependencies
204
+ @write_dependencies = write_dependencies
205
+ @op_lock = lock
206
+ @recovering = true
207
+
208
+ query = Promiscuous::Publisher::Operation::ProxyForQuery.new(self) { recover_db_operation }
209
+ execute_instrumented(query)
210
+ query.result
211
+ end
223
212
  end
224
- end
213
+ end.join
225
214
 
226
- lock.unlock
227
215
  rescue Exception => e
228
- message = "cannot recover #{lock.key} -> #{recovery_data}"
216
+ message = "cannot recover #{lock.key}, failed to fetch recovery data"
217
+ message = "cannot recover #{lock.key}, recovery data: #{recovery_data}" if recovery_data
229
218
  raise Promiscuous::Error::Recovery.new(message, e)
230
219
  end
231
220
 
232
- def increment_read_and_write_dependencies(read_dependencies, write_dependencies)
221
+ def increment_read_and_write_dependencies
233
222
  # We collapse all operations, ignoring the read/write interleaving.
234
223
  # It doesn't matter since all write operations are serialized, so the first
235
224
  # write in the transaction can have all the read dependencies.
@@ -241,92 +230,166 @@ class Promiscuous::Publisher::Operation::Base
241
230
  # r and w is empty) when it calculates the happens before relationships.
242
231
  r -= w
243
232
 
244
- master_node = w.first.redis_node
245
- operation_recovery_key = w.first
233
+ master_node = @op_lock.node
234
+ operation_recovery_key = "#{@op_lock.key}:operation_recovery"
246
235
 
247
236
  # We group all the dependencies by their respective shards
248
237
  # The master node will have the responsability to hold the recovery data.
249
238
  # We do the master node first. The seconaries can be done in parallel.
250
- (w+r).group_by(&:redis_node).each do |node, deps|
251
- r_deps = deps.select { |dep| dep.in? r }
252
- w_deps = deps.select { |dep| dep.in? w }
239
+ @committed_read_deps = []
240
+ @committed_write_deps = []
241
+
242
+ # We need to do the increments always in the same node order, otherwise.
243
+ # the subscriber can deadlock. But we must always put the recovery payload
244
+ # on the master before touching anything.
245
+ nodes_deps = (w+r).group_by(&:redis_node)
246
+ .sort_by { |node, deps| -Promiscuous::Redis.master.nodes.index(node) }
247
+ if nodes_deps.first[0] != master_node
248
+ nodes_deps = [[master_node, []]] + nodes_deps
249
+ end
253
250
 
251
+ nodes_deps.each do |node, deps|
254
252
  argv = []
255
253
  argv << Promiscuous::Key.new(:pub) # key prefixes
256
- argv << MultiJson.dump([r_deps, w_deps])
254
+ argv << operation_recovery_key
255
+
256
+ # The index of the first write is then used to pass to redis along with the
257
+ # dependencies. This is done because arguments to redis LUA scripts cannot
258
+ # accept complex data types.
259
+ argv << (deps.index(&:read?) || deps.length)
257
260
 
258
261
  # Each shard have their own recovery payload. The master recovery node
259
262
  # has the full operation recovery, and the others just have their versions.
260
- argv << operation_recovery_key.as_json
261
- if node == master_node
263
+ # Note that the operation_recovery_key on the secondaries have the current
264
+ # version of the instance appended to them. It's easier to cleanup when
265
+ # locks get lost.
266
+ if node == master_node && !self.recovering?
262
267
  # We are on the master node, which holds the recovery payload
263
- document = serialize_document_for_create_recovery if operation == :create
264
- argv << MultiJson.dump([@instance.class.promiscuous_collection_name,
265
- @instance.id, operation, document, r, w])
268
+ argv << MultiJson.dump([self.class.name, operation, r, w, self.recovery_payload])
266
269
  end
267
270
 
271
+ # FIXME If the lock is lost, we need to backoff
272
+
268
273
  # We are going to store all the versions in redis, to be able to recover.
269
274
  # We store all our increments in a transaction_id key in JSON format.
270
275
  # Note that the transaction_id is the id of the current instance.
271
276
  @@increment_script ||= Promiscuous::Redis::Script.new <<-SCRIPT
272
277
  local prefix = ARGV[1] .. ':'
273
- local deps = cjson.decode(ARGV[2])
274
- local read_deps = deps[1]
275
- local write_deps = deps[2]
276
- local operation_recovery_key = prefix .. ARGV[3] .. ':operation_recovery'
278
+ local operation_recovery_key = ARGV[2]
279
+ local versions_recovery_key = operation_recovery_key .. ':versions'
280
+ local first_read_index = tonumber(ARGV[3]) + 1
277
281
  local operation_recovery_payload = ARGV[4]
282
+ local deps = KEYS
278
283
 
279
- local read_versions = {}
280
- local write_versions = {}
284
+ local versions = {}
281
285
 
282
- if redis.call('exists', operation_recovery_key) == 1 then
283
- for i, dep in ipairs(read_deps) do
284
- local key = prefix .. dep
285
- read_versions[i] = redis.call('get', key .. ':w')
286
- end
287
- for i, dep in ipairs(write_deps) do
288
- local key = prefix .. dep
289
- write_versions[i] = redis.call('get', key .. ':w')
290
- end
291
- else
292
- for i, dep in ipairs(read_deps) do
293
- local key = prefix .. dep
294
- redis.call('incr', key .. ':rw')
295
- read_versions[i] = redis.call('get', key .. ':w')
296
- redis.call('hset', operation_recovery_key, dep, read_versions[i])
286
+ if redis.call('exists', versions_recovery_key) == 1 then
287
+ first_read_index = tonumber(redis.call('hget', versions_recovery_key, 'read_index'))
288
+ if not first_read_index then
289
+ return redis.error_reply('Failed to read dependency index during recovery')
297
290
  end
298
291
 
299
- for i, dep in ipairs(write_deps) do
300
- local key = prefix .. dep
301
- write_versions[i] = redis.call('incr', key .. ':rw')
302
- redis.call('set', key .. ':w', write_versions[i])
303
- redis.call('hset', operation_recovery_key, dep, write_versions[i])
292
+ for i, dep in ipairs(deps) do
293
+ versions[i] = tonumber(redis.call('hget', versions_recovery_key, dep))
294
+ if not versions[i] then
295
+ return redis.error_reply('Failed to read dependency ' .. dep .. ' during recovery')
296
+ end
304
297
  end
305
298
 
306
- if operation_recovery_payload then
307
- redis.call('hset', operation_recovery_key, 'payload', operation_recovery_payload)
299
+ return { first_read_index-1, versions }
300
+ end
301
+
302
+ if redis.call('exists', prefix .. 'bootstrap') == 1 then
303
+ first_read_index = #deps + 1
304
+ end
305
+
306
+ if #deps ~= 0 then
307
+ redis.call('hset', versions_recovery_key, 'read_index', first_read_index)
308
+ end
309
+
310
+ for i, dep in ipairs(deps) do
311
+ local key = prefix .. dep
312
+ local rw_version = redis.call('incr', key .. ':rw')
313
+ if i < first_read_index then
314
+ redis.call('set', key .. ':w', rw_version)
315
+ versions[i] = rw_version
316
+ else
317
+ versions[i] = tonumber(redis.call('get', key .. ':w')) or 0
308
318
  end
319
+ redis.call('hset', versions_recovery_key, dep, versions[i])
309
320
  end
310
321
 
311
- return { read_versions, write_versions }
322
+ if operation_recovery_payload then
323
+ redis.call('set', operation_recovery_key, operation_recovery_payload)
324
+ end
325
+
326
+ return { first_read_index-1, versions }
312
327
  SCRIPT
313
- read_versions, write_versions = @@increment_script.eval(node, :argv => argv)
314
328
 
315
- r_deps.zip(read_versions).each { |dep, version| dep.version = version.to_i }
316
- w_deps.zip(write_versions).each { |dep, version| dep.version = version.to_i }
329
+ first_read_index, versions = @@increment_script.eval(node, :argv => argv, :keys => deps)
330
+
331
+ deps.zip(versions).each { |dep, version| dep.version = version }
332
+
333
+ @committed_write_deps += deps[0...first_read_index]
334
+ @committed_read_deps += deps[first_read_index..-1]
317
335
  end
318
336
 
319
- @committed_read_deps = r
320
- @committed_write_deps = w
321
- @instance_version = w.first.version
337
+ # The instance version must to be the first in the list to allow atomic
338
+ # subscribers to do their magic.
339
+ # TODO What happens with transactions with multiple operations?
340
+ instance_dep_index = @committed_write_deps.index(write_dependencies.first)
341
+ @committed_write_deps[0], @committed_write_deps[instance_dep_index] =
342
+ @committed_write_deps[instance_dep_index], @committed_write_deps[0]
322
343
  end
323
344
 
324
- LOCK_OPTIONS = { :timeout => 10.seconds, # after 10 seconds, we give up
325
- :sleep => 0.01, # polling every 10ms.
326
- :expire => 1.minute } # after one minute, we are considered dead
327
-
328
345
  def self.lock_options
329
- LOCK_OPTIONS.merge({ :lock_set => Promiscuous::Key.new(:pub).join('lock_set').to_s })
346
+ {
347
+ :timeout => 10.seconds, # after 10 seconds, we give up so we don't queue requests
348
+ :sleep => 0.01.seconds, # polling every 10ms.
349
+ :expire => 1.minute, # after one minute, we are considered dead
350
+ :lock_set => Promiscuous::Key.new(:pub).join('lock_set').to_s
351
+ }
352
+ end
353
+ delegate :lock_options, :to => self
354
+
355
+ def dependency_for_op_lock
356
+ query_dependencies.first
357
+ end
358
+
359
+ def get_new_op_lock
360
+ dep = dependency_for_op_lock
361
+ Promiscuous::Redis::Mutex.new(dep.key(:pub).to_s, lock_options.merge(:node => dep.redis_node))
362
+ end
363
+
364
+ def self._acquire_lock(mutex)
365
+ loop do
366
+ case mutex.lock
367
+ # recover_operation_from_lock implicitely unlocks the lock.
368
+ when :recovered then recover_operation_from_lock(mutex)
369
+ when true then return true
370
+ when false then return false
371
+ end
372
+ end
373
+ end
374
+
375
+ def acquire_op_lock
376
+ @op_lock = get_new_op_lock
377
+
378
+ unless self.class._acquire_lock(@op_lock)
379
+ raise Promiscuous::Error::LockUnavailable.new(@op_lock.key)
380
+ end
381
+ end
382
+
383
+ def release_op_lock
384
+ @op_lock.unlock
385
+ @op_lock = nil
386
+ end
387
+
388
+ def ensure_op_still_locked
389
+ unless @op_lock.still_locked?
390
+ # We lost the lock, let the recovery mechanism do its thing.
391
+ raise Promiscuous::Error::LostLock.new(@op_lock.key)
392
+ end
330
393
  end
331
394
 
332
395
  def self.recover_locks
@@ -340,368 +403,110 @@ class Promiscuous::Publisher::Operation::Base
340
403
  break unless key && Time.now.to_i >= time.to_i + lock_options[:expire]
341
404
 
342
405
  mutex = Promiscuous::Redis::Mutex.new(key, lock_options.merge(:node => node))
343
- case mutex.lock
344
- when :recovered then recover_operation_from_lock(mutex)
345
- when true then mutex.unlock
346
- when false then ;
347
- end
406
+ mutex.unlock if _acquire_lock(mutex)
348
407
  end
349
408
  end
350
409
  end
410
+ register_recovery_mechanism :recover_locks
351
411
 
352
- def locks_from_write_dependencies
353
- # XXX TODO Support multi row writes
354
- instance_dep = write_dependencies.first
355
- return [] unless instance_dep
356
- options = self.class.lock_options.merge(:node => instance_dep.redis_node)
357
- [Promiscuous::Redis::Mutex.new(instance_dep.key(:pub).to_s, options)]
358
- end
359
-
360
- def lock_write_dependencies
361
- # returns true if we could get all the locks, false otherwise
362
-
363
- start_at = Time.now
364
- @recovered_locks = []
365
-
366
- # We acquire all the locks in order, and unlock everything if one come
367
- # to fail. lock/unlock return true/false when they succeed/fail
368
- locks = locks_from_write_dependencies
369
- locks.reduce(->{ @locks = locks; true }) do |chain, l|
370
- lambda do
371
- return false if Time.now - start_at > LOCK_OPTIONS[:timeout]
372
- case l.lock
373
- # Note that we do not unlock the recovered lock if the chain fails
374
- when :recovered then @recovered_locks << l; chain.call
375
- when true then chain.call or (l.unlock; false)
376
- when false then @unavailable_lock = l; false
377
- end
378
- end
379
- end.call
380
- end
381
-
382
- def unlock_write_dependencies
383
- # returns true if we could unlock all the locks, false otherwise
384
- return true if @locks.blank?
385
- @locks.reduce(true) { |result, l| l.unlock && result }.tap { @locks = nil }
386
- end
412
+ def dependencies_for(instance, options={})
413
+ return [] if instance.nil?
387
414
 
388
- def _reload_instance_dependencies
389
415
  if read?
390
416
  # We want to use the smallest subset that we can depend on when doing
391
417
  # reads. tracked_dependencies comes sorted from the smallest subset to
392
418
  # the largest. For maximum performance on the subscriber side, we thus
393
419
  # pick the first one. In most cases, it should resolve to the id
394
420
  # dependency.
395
- best_dependency = @instance.promiscuous.tracked_dependencies.first
396
- unless best_dependency
397
- raise Promiscuous::Error::Dependency.new(:operation => self)
398
- end
399
- [best_dependency]
421
+ # If we don't have any, the driver should track individual instances.
422
+ best_dependency = instance.promiscuous.tracked_dependencies(:allow_missing_attributes => true).first
423
+ [best_dependency].compact
400
424
  else
401
425
  # Note that tracked_dependencies will not return the id dependency if it
402
426
  # doesn't exist which can only happen for create operations and auto
403
- # generated ids. Be aware that with auto generated id, create operation
404
- # might not provide the id dependency.
405
- @instance.promiscuous.tracked_dependencies
427
+ # generated ids.
428
+ instance.promiscuous.tracked_dependencies
406
429
  end
407
430
  end
408
431
 
409
- def reload_instance_dependencies
410
- # Returns true when the dependencies changed, false otherwise
411
- @write_dependencies = nil
412
- old = @instance_dependencies
413
- @instance_dependencies = _reload_instance_dependencies
414
- old != @instance_dependencies
415
- end
416
-
417
- def instance_dependencies
418
- reload_instance_dependencies unless @instance_dependencies
419
- @instance_dependencies
420
- end
421
-
422
- def previous_successful_operations
423
- current_context.operations.reject(&:failed?)
424
- end
425
-
426
432
  def read_dependencies
427
433
  # We memoize the read dependencies not just for performance, but also
428
434
  # because we store the versions once incremented in these.
429
435
  return @read_dependencies if @read_dependencies
430
- read_dependencies = previous_successful_operations.select(&:read?)
431
- .map(&:instance_dependencies).flatten
436
+ read_dependencies = current_context.read_operations.map(&:query_dependencies).flatten
432
437
 
433
- # We implicitly have a read dependency on the latest write.
434
- if current_context.last_write_dependency
435
- current_context.last_write_dependency.version = nil
436
- read_dependencies << current_context.last_write_dependency
438
+ # We add extra_dependencies, which can contain the latest write, or user
439
+ # context, etc.
440
+ current_context.extra_dependencies.each do |dep|
441
+ dep.version = nil
442
+ read_dependencies << dep
437
443
  end
438
444
 
439
- @read_dependencies = read_dependencies.uniq
445
+ @read_dependencies = read_dependencies.uniq.each { |d| d.type = :read }
440
446
  end
441
- alias verify_read_dependencies read_dependencies
447
+ alias generate_read_dependencies read_dependencies
442
448
 
443
449
  def write_dependencies
444
- # The cache is cleared when we call reload_instance_dependencies
445
- @write_dependencies ||= previous_successful_operations.select(&:write?)
446
- .map(&:instance_dependencies).flatten.uniq
450
+ @write_dependencies ||= self.query_dependencies.uniq.each { |d| d.type = :write }
447
451
  end
448
452
 
449
- def reload_instance
450
- @instance = without_promiscuous { fetch_instance }
453
+ def should_instrument_query?
454
+ # current_context is later enforced for writes.
455
+ !Promiscuous.disabled? && (current_context || write?)
451
456
  end
452
457
 
453
- def perform_db_operation_with_no_exceptions(&db_operation)
454
- going_to_execute_db_operation
455
- @result = db_operation.call(self)
456
- rescue Exception => e
457
- @exception = e
458
- end
459
-
460
- def lock_instance_for_execute_persistent
461
- current_context.add_operation(self)
458
+ def execute(&query_config)
459
+ query = Promiscuous::Publisher::Operation::ProxyForQuery.new(self, &query_config)
462
460
 
463
- # Note: At first, @instance can be a representation of a selector, to
464
- # become a real model instance once we get to fetch it from the db with
465
- # reload_instance to lock an instance that matches the selector.
466
- # This is a good thing because we allow the underlying driver to hook from
467
- # the model interface to the driver interface easily.
468
- auto_unlock = true
469
-
470
- begin
471
- unless lock_write_dependencies
472
- raise Promiscuous::Error::LockUnavailable.new(@unavailable_lock.key)
473
- end
474
-
475
- if @recovered_locks.present?
476
- # When recovering locks, if we fail, we must not release the lock again
477
- # to allow another one to do the recovery.
478
- auto_unlock = false
479
- @recovered_locks.each { |lock| self.class.recover_operation_from_lock(lock) }
480
- auto_unlock = true
481
- raise TryAgain
482
- end
483
-
484
- if operation != :create
485
- # We need to lock and update all the dependencies before any other
486
- # readers can see our write through any one of our tracked attributes.
487
-
488
- # We want to reload the instance to make sure we have all the locked
489
- # dependencies that we need. It's a query we cannot avoid when we have
490
- # tracked dependencies. There is a bit of room for optimization.
491
- # If the selector doesn't fetch any instance, the query has no effect
492
- # so we can bypass it as if nothing happened. If reload_instance
493
- # raises an exception, it's okay to let it bubble up since we haven't
494
- # touch anything yet except for the locks (which will be unlocked on
495
- # the way out)
496
- return false unless reload_instance
497
-
498
- # If reload_instance changed the current instance because the selector,
499
- # we need to unlock the old instance, lock this new instance, and
500
- # retry. XXX What should we do if we are going in a live lock?
501
- # Sleep with some jitter?
502
- if reload_instance_dependencies
503
- raise TryAgain
504
- end
505
- end
506
- rescue TryAgain
507
- unlock_write_dependencies if auto_unlock
508
- retry
509
- end
510
-
511
- verify_read_dependencies
512
- if write_dependencies.blank?
513
- # TODO We don't like auto generated ids. A good solution is to do all
514
- # writes in a transaction, so we can know the ids at commit time.
515
- raise "We don't support auto generated id yet"
516
- end
517
-
518
- # We are now in the possession of an instance that matches the original
519
- # selector, we can proceed.
520
- auto_unlock = false
521
- true
522
- ensure
523
- # In case of an exception was raised before we updated the version in
524
- # redis, we can unlock because we don't need recovery.
525
- unlock_write_dependencies if auto_unlock
526
- end
527
-
528
- def execute_persistent_locked(&db_operation)
529
- # We are going to commit all the pending writes in the context if we are
530
- # doing a transaction commit. We also commit the current write operation for
531
- # atomic writes without transactions. We enable the recovery mechanism by
532
- # having someone expiring our lock if we die in the middle.
533
-
534
- # All the versions are updated and a marked as pending for publish in Redis
535
- # atomically in case we die before we could write the versions in the
536
- # database. Once incremented, concurrent queries that are reading our
537
- # instance will be serialized after our write, even through it may read our
538
- # old instance. This is a race that we tolerate.
539
- # XXX We also stash the document for create operations, so the recovery can
540
- # redo the create to avoid races when instances are getting partitioned.
541
- increment_read_and_write_dependencies(read_dependencies, write_dependencies)
542
-
543
- # From this point, if we die, the one expiring our write locks must finish
544
- # the publish, either by sending a dummy, or by sending the real instance.
545
- # We could have die before or after the database query.
546
-
547
- # We save the versions in the database, as it is our source of truth.
548
- # This allow a reconstruction of redis in the face of failures.
549
- # We would also need to send a special message to the subscribers to reset
550
- # their read counters to the last write version since we would not be able
551
- # to restore the read counters (and we don't want to store them because
552
- # this would dramatically augment our footprint on the db).
553
- #
554
- # If we are doing a destroy operation, and redis dies right after, and
555
- # we happen to lost contact with rabbitmq, recovery is going to be complex:
556
- # we would need to do a diff from the dummy subscriber to see what
557
- # documents are missing on our side to be able to resend the destroy
558
- # message.
559
-
560
- case operation
561
- when :create
562
- stash_version_in_write_query
563
- when :update
564
- stash_version_in_write_query
565
- # We are now in the possession of an instance that matches the original
566
- # selector. We need to make sure the db_operation will operate on it,
567
- # instead of the original selector.
568
- use_id_selector(:use_atomic_version_selector => true)
569
- # We need to use an atomic versioned selector to make sure that
570
- # if we lose the lock for a long period of time, we don't mess up
571
- # with other people's updates. Also we make sure that the recovery
572
- # mechanism is not racing with us.
573
- when :destroy
574
- use_id_selector(:use_atomic_version_selector => true)
575
- end
576
-
577
- # Perform the actual database query (single write or transaction commit).
578
- # If successful, the result goes in @result, otherwise, @exception contains
579
- # the thrown exception.
580
- perform_db_operation_with_no_exceptions(&db_operation)
581
-
582
- # We take a timestamp right after the write is performed because latency
583
- # measurements are performed on the subscriber.
584
- record_timestamp
585
-
586
- if operation == :update && !failed?
587
- # The underlying driver should implement some sort of find and modify
588
- # operation in the previous write query to avoid this extra read query.
589
- # If reload_instance raise an exception, we let it bubble up,
590
- # and we'll trigger the recovery mechanism.
591
- use_id_selector
592
- reload_instance
593
- end
594
-
595
- unless @locks.first.still_locked?
596
- # We lost the lock, let the recovery mechanism do its thing.
597
- # This is a code optimization to avoid checking if the db operation
598
- # succeeded or not because of the db operation race during recovery.
599
- raise Promiscuous::Error::LostLock.new(@locks.first.key)
461
+ if should_instrument_query?
462
+ raise Promiscuous::Error::MissingContext if !current_context && write?
463
+ execute_instrumented(query)
464
+ else
465
+ query.call_and_remember_result(:non_instrumented)
600
466
  end
601
467
 
602
- generate_payload_and_clear_operations
603
-
604
- # As soon as we unlock the locks, the rescuer will not be able to assume
605
- # that the database instance is still pristine, and so we need to stash the
606
- # payload in redis. If redis dies, we don't care because it can be
607
- # reconstructed. Subscribers can see "compressed" updates.
608
- publish_payload_in_redis
609
-
610
- # TODO Performance: merge these 3 redis operations to speed things up.
611
- unlock_write_dependencies
612
-
613
- # If we die from this point on, a recovery worker can republish our payload
614
- # since we queued it in Redis.
615
-
616
- # We don't care if we lost the lock and got recovered, subscribers are
617
- # immune to duplicate messages.
618
- publish_payload_in_rabbitmq_async
468
+ query.result
619
469
  end
620
470
 
621
- # --- the following methods can be overridden by the driver --- #
622
-
623
- def execute_persistent(&db_operation)
624
- return nil unless lock_instance_for_execute_persistent
625
- execute_persistent_locked(&db_operation)
471
+ def query_dependencies
472
+ # Returns the list of dependencies that are involved in the database query.
473
+ # For an atomic write operation, the first one returned must be the one
474
+ # corresponding to the primary key.
475
+ raise
626
476
  end
627
477
 
628
- def execute_non_persistent(&db_operation)
629
- # We are getting here in the following cases:
630
- # * read: we fetch the instance. It's the driver's job to cache the
631
- # raw instance and return it during db_operation.
632
- # * multi read: nothing to do, we'll keep our current selector, sadly
633
- # * write in a transaction: TODO
634
-
635
- if single?
636
- # If the query misses, we don't bother
637
- return nil unless reload_instance
638
- use_id_selector
639
- end
640
-
641
- # We don't do any reload_instance_dependencies at this point (and thus we
642
- # won't raise an exception on a multi read that we cannot track).
643
- # We'll wait until the commit, and hopefully with tainting, we'll be able to
644
- # tell if we should depend the multi read operation in question.
645
- perform_db_operation_with_no_exceptions(&db_operation)
646
- # If the db_operation raises, we don't consider this failed operation when
647
- # committing the next persistent write by omitting the operation in the
648
- # context.
649
- current_context.add_operation(self) unless failed?
650
- end
651
-
652
- def execute(&db_operation)
653
- # execute returns the result of the db_operation to perform
654
- db_operation ||= proc {}
655
- return db_operation.call if Promiscuous.disabled
656
-
657
- unless current_context
658
- raise Promiscuous::Error::MissingContext if write?
659
- return db_operation.call # Don't care for a read
660
- end
661
-
662
- self.persists? ? execute_persistent(&db_operation) :
663
- execute_non_persistent(&db_operation)
664
-
665
- @exception ? (raise @exception) : @result
478
+ def execute_instrumented(db_operation)
479
+ # Implemented by subclasses
480
+ raise
666
481
  end
667
482
 
668
- def fetch_instance
669
- # This method is overridden to use the original query selector.
670
- # Should return nil if the instance is not found.
671
- @instance
483
+ def operation_payloads
484
+ # subclass can use payloads_for to generate the payload
485
+ raise
672
486
  end
673
487
 
674
- def serialize_document_for_create_recovery
675
- # Overridden to be able to redo the create during recovery.
676
- nil
488
+ def recovery_payload
489
+ # Overridden to be able to recover the operation
490
+ []
677
491
  end
678
492
 
679
- def self.recover_operation(model, instance_id, document)
680
- # Overriden to reconstruct the operation. If the database is read, only the
681
- # primary must be used.
682
- new(:instance => model.new { |instance| instance.id = instance_id })
493
+ def self.recover_operation(*recovery_payload)
494
+ # Overridden to reconstruct the operation.
683
495
  end
684
496
 
685
497
  def recover_db_operation
686
- # Overriden to reexecute the db operation during recovery (or make sure that
498
+ # Overridden to reexecute the db operation during recovery (or make sure that
687
499
  # it will never succeed).
688
500
  end
689
501
 
690
- def use_id_selector(options={})
691
- # Overridden to use the {:id => @instance.id} selector.
692
- # if use_atomic_version_selector is passed, the driver must
693
- # add the VERSION_FIELD selector if present in original instance.
694
- end
695
-
696
- def use_versioned_selector
697
- # Overridden to use the {VERSION_FIELD => @instance[VERSION_FIELD]} selector.
698
- end
699
-
700
- def stash_version_in_write_query
701
- # Overridden to update the query to set 'instance.VERSION_FIELD = @instance_version'
502
+ def trace_operation
503
+ if ENV['TRACE']
504
+ msg = self.explain_operation(70)
505
+ current_context.trace(msg, :color => self.read? ? '0;32' : '1;31')
506
+ end
702
507
  end
703
508
 
704
- def going_to_execute_db_operation
705
- # Test hook
509
+ def explain_operation(max_width)
510
+ "Unknown database operation"
706
511
  end
707
512
  end