deimos-ruby 1.7.0.pre.beta1 → 1.8.1.pre.beta3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +8 -4
  3. data/CHANGELOG.md +50 -0
  4. data/Gemfile.lock +109 -75
  5. data/README.md +147 -16
  6. data/deimos-ruby.gemspec +4 -2
  7. data/docs/ARCHITECTURE.md +144 -0
  8. data/docs/CONFIGURATION.md +4 -0
  9. data/lib/deimos.rb +8 -7
  10. data/lib/deimos/active_record_consume/batch_consumption.rb +159 -0
  11. data/lib/deimos/active_record_consume/batch_slicer.rb +27 -0
  12. data/lib/deimos/active_record_consume/message_consumption.rb +58 -0
  13. data/lib/deimos/active_record_consume/schema_model_converter.rb +52 -0
  14. data/lib/deimos/active_record_consumer.rb +33 -75
  15. data/lib/deimos/batch_consumer.rb +2 -142
  16. data/lib/deimos/config/configuration.rb +8 -10
  17. data/lib/deimos/consume/batch_consumption.rb +150 -0
  18. data/lib/deimos/consume/message_consumption.rb +94 -0
  19. data/lib/deimos/consumer.rb +79 -72
  20. data/lib/deimos/instrumentation.rb +10 -5
  21. data/lib/deimos/kafka_message.rb +1 -1
  22. data/lib/deimos/kafka_topic_info.rb +21 -2
  23. data/lib/deimos/message.rb +6 -1
  24. data/lib/deimos/schema_backends/avro_base.rb +33 -1
  25. data/lib/deimos/schema_backends/avro_schema_coercer.rb +30 -11
  26. data/lib/deimos/schema_backends/base.rb +21 -2
  27. data/lib/deimos/utils/db_poller.rb +6 -6
  28. data/lib/deimos/utils/db_producer.rb +57 -15
  29. data/lib/deimos/utils/deadlock_retry.rb +68 -0
  30. data/lib/deimos/utils/lag_reporter.rb +19 -26
  31. data/lib/deimos/utils/schema_controller_mixin.rb +111 -0
  32. data/lib/deimos/version.rb +1 -1
  33. data/lib/generators/deimos/active_record/templates/migration.rb.tt +28 -0
  34. data/lib/generators/deimos/active_record/templates/model.rb.tt +5 -0
  35. data/lib/generators/deimos/active_record_generator.rb +79 -0
  36. data/lib/generators/deimos/db_backend/templates/migration +1 -0
  37. data/lib/generators/deimos/db_backend/templates/rails3_migration +1 -0
  38. data/spec/active_record_batch_consumer_spec.rb +481 -0
  39. data/spec/active_record_consume/batch_slicer_spec.rb +42 -0
  40. data/spec/active_record_consume/schema_model_converter_spec.rb +105 -0
  41. data/spec/active_record_consumer_spec.rb +3 -11
  42. data/spec/batch_consumer_spec.rb +24 -7
  43. data/spec/config/configuration_spec.rb +4 -0
  44. data/spec/consumer_spec.rb +6 -6
  45. data/spec/deimos_spec.rb +57 -49
  46. data/spec/generators/active_record_generator_spec.rb +56 -0
  47. data/spec/handlers/my_batch_consumer.rb +6 -1
  48. data/spec/handlers/my_consumer.rb +6 -1
  49. data/spec/kafka_listener_spec.rb +54 -0
  50. data/spec/kafka_topic_info_spec.rb +39 -16
  51. data/spec/message_spec.rb +19 -0
  52. data/spec/producer_spec.rb +34 -0
  53. data/spec/schemas/com/my-namespace/Generated.avsc +71 -0
  54. data/spec/schemas/com/my-namespace/MyNestedSchema.avsc +55 -0
  55. data/spec/schemas/com/my-namespace/MySchemaCompound-key.avsc +18 -0
  56. data/spec/schemas/com/my-namespace/Wibble.avsc +43 -0
  57. data/spec/schemas/com/my-namespace/request/Index.avsc +11 -0
  58. data/spec/schemas/com/my-namespace/request/UpdateRequest.avsc +11 -0
  59. data/spec/schemas/com/my-namespace/response/Index.avsc +11 -0
  60. data/spec/schemas/com/my-namespace/response/UpdateResponse.avsc +11 -0
  61. data/spec/spec_helper.rb +24 -0
  62. data/spec/utils/db_poller_spec.rb +2 -2
  63. data/spec/utils/db_producer_spec.rb +84 -10
  64. data/spec/utils/deadlock_retry_spec.rb +74 -0
  65. data/spec/utils/lag_reporter_spec.rb +29 -22
  66. data/spec/utils/schema_controller_mixin_spec.rb +68 -0
  67. metadata +87 -30
  68. data/lib/deimos/base_consumer.rb +0 -100
  69. data/lib/deimos/utils/executor.rb +0 -124
  70. data/lib/deimos/utils/platform_schema_validation.rb +0 -0
  71. data/lib/deimos/utils/signal_handler.rb +0 -68
  72. data/spec/utils/executor_spec.rb +0 -53
  73. data/spec/utils/signal_handler_spec.rb +0 -16
@@ -3,13 +3,15 @@
3
3
  module Deimos
4
4
  # Represents a field in the schema.
5
5
  class SchemaField
6
- attr_accessor :name, :type
6
+ attr_accessor :name, :type, :enum_values
7
7
 
8
8
  # @param name [String]
9
9
  # @param type [Object]
10
- def initialize(name, type)
10
+ # @param enum_values [Array<String>]
11
+ def initialize(name, type, enum_values=[])
11
12
  @name = name
12
13
  @type = type
14
+ @enum_values = enum_values
13
15
  end
14
16
  end
15
17
 
@@ -69,6 +71,12 @@ module Deimos
69
71
  :mock
70
72
  end
71
73
 
74
+ # The content type to use when encoding / decoding requests over HTTP via ActionController.
75
+ # @return [String]
76
+ def self.content_type
77
+ raise NotImplementedError
78
+ end
79
+
72
80
  # Encode a payload. To be defined by subclass.
73
81
  # @param payload [Hash]
74
82
  # @param schema [Symbol|String]
@@ -109,6 +117,17 @@ module Deimos
109
117
  raise NotImplementedError
110
118
  end
111
119
 
120
+ # Given a field definition, return the SQL type that might be used in
121
+ # ActiveRecord table creation - e.g. for Avro, a `long` type would
122
+ # return `:bigint`. There are also special values that need to be returned:
123
+ # `:array`, `:map` and `:record`, for types representing those structures.
124
+ # `:enum` is also recognized.
125
+ # @param field [SchemaField]
126
+ # @return [Symbol]
127
+ def sql_type(field)
128
+ raise NotImplementedError
129
+ end
130
+
112
131
  # Encode a message key. To be defined by subclass.
113
132
  # @param key [String|Hash] the value to use as the key.
114
133
  # @param key_id [Symbol|String] the field name of the key.
@@ -1,8 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'deimos/poll_info'
4
- require 'deimos/utils/executor'
5
- require 'deimos/utils/signal_handler'
4
+ require 'sigurd/executor'
5
+ require 'sigurd/signal_handler'
6
6
 
7
7
  module Deimos
8
8
  module Utils
@@ -22,10 +22,10 @@ module Deimos
22
22
  pollers = Deimos.config.db_poller_objects.map do |poller_config|
23
23
  self.new(poller_config)
24
24
  end
25
- executor = Deimos::Utils::Executor.new(pollers,
26
- sleep_seconds: 5,
27
- logger: Deimos.config.logger)
28
- signal_handler = Deimos::Utils::SignalHandler.new(executor)
25
+ executor = Sigurd::Executor.new(pollers,
26
+ sleep_seconds: 5,
27
+ logger: Deimos.config.logger)
28
+ signal_handler = Sigurd::SignalHandler.new(executor)
29
29
  signal_handler.run!
30
30
  end
31
31
 
@@ -9,6 +9,8 @@ module Deimos
9
9
  attr_accessor :id, :current_topic
10
10
 
11
11
  BATCH_SIZE = 1000
12
+ DELETE_BATCH_SIZE = 10
13
+ MAX_DELETE_ATTEMPTS = 3
12
14
 
13
15
  # @param logger [Logger]
14
16
  def initialize(logger=Logger.new(STDOUT))
@@ -48,6 +50,7 @@ module Deimos
48
50
  topics = retrieve_topics
49
51
  @logger.info("Found topics: #{topics}")
50
52
  topics.each(&method(:process_topic))
53
+ KafkaTopicInfo.ping_empty_topics(topics)
51
54
  sleep(0.5)
52
55
  end
53
56
 
@@ -87,13 +90,13 @@ module Deimos
87
90
  begin
88
91
  produce_messages(compacted_messages.map(&:phobos_message))
89
92
  rescue Kafka::BufferOverflow, Kafka::MessageSizeTooLarge, Kafka::RecordListTooLarge
90
- Deimos::KafkaMessage.where(id: messages.map(&:id)).delete_all
93
+ delete_messages(messages)
91
94
  @logger.error('Message batch too large, deleting...')
92
95
  @logger.error(Deimos::KafkaMessage.decoded(messages))
93
96
  raise
94
97
  end
95
98
  end
96
- Deimos::KafkaMessage.where(id: messages.map(&:id)).delete_all
99
+ delete_messages(messages)
97
100
  Deimos.config.metrics&.increment(
98
101
  'db_producer.process',
99
102
  tags: %W(topic:#{@current_topic}),
@@ -106,6 +109,27 @@ module Deimos
106
109
  true
107
110
  end
108
111
 
112
+ # @param messages [Array<Deimos::KafkaMessage>]
113
+ def delete_messages(messages)
114
+ attempts = 1
115
+ begin
116
+ messages.in_groups_of(DELETE_BATCH_SIZE, false).each do |batch|
117
+ Deimos::KafkaMessage.where(topic: batch.first.topic,
118
+ id: batch.map(&:id)).
119
+ delete_all
120
+ end
121
+ rescue StandardError => e
122
+ if (e.message =~ /Lock wait/i || e.message =~ /Lost connection/i) &&
123
+ attempts <= MAX_DELETE_ATTEMPTS
124
+ attempts += 1
125
+ ActiveRecord::Base.connection.verify!
126
+ sleep(1)
127
+ retry
128
+ end
129
+ raise
130
+ end
131
+ end
132
+
109
133
  # @return [Array<Deimos::KafkaMessage>]
110
134
  def retrieve_messages
111
135
  KafkaMessage.where(topic: @current_topic).order(:id).limit(BATCH_SIZE)
@@ -126,15 +150,33 @@ module Deimos
126
150
  metrics = Deimos.config.metrics
127
151
  return unless metrics
128
152
 
153
+ topics = KafkaTopicInfo.select(%w(topic last_processed_at))
129
154
  messages = Deimos::KafkaMessage.
130
155
  select('count(*) as num_messages, min(created_at) as earliest, topic').
131
- group(:topic)
132
- if messages.none?
133
- metrics.gauge('pending_db_messages_max_wait', 0)
134
- end
135
- messages.each do |record|
136
- time_diff = Time.zone.now - record.earliest
137
- metrics.gauge('pending_db_messages_max_wait', time_diff,
156
+ group(:topic).
157
+ index_by(&:topic)
158
+ topics.each do |record|
159
+ message_record = messages[record.topic]
160
+ # We want to record the last time we saw any activity, meaning either
161
+ # the oldest message, or the last time we processed, whichever comes
162
+ # last.
163
+ if message_record
164
+ record_earliest = record.earliest
165
+ # SQLite gives a string here
166
+ if record_earliest.is_a?(String)
167
+ record_earliest = Time.zone.parse(record_earliest)
168
+ end
169
+
170
+ earliest = [record.last_processed_at, record_earliest].max
171
+ time_diff = Time.zone.now - earliest
172
+ metrics.gauge('pending_db_messages_max_wait', time_diff,
173
+ tags: ["topic:#{record.topic}"])
174
+ else
175
+ # no messages waiting
176
+ metrics.gauge('pending_db_messages_max_wait', 0,
177
+ tags: ["topic:#{record.topic}"])
178
+ end
179
+ metrics.gauge('pending_db_messages_count', message_record&.num_messages || 0,
138
180
  tags: ["topic:#{record.topic}"])
139
181
  end
140
182
  end
@@ -170,11 +212,11 @@ module Deimos
170
212
  end
171
213
 
172
214
  @logger.error("Got error #{e.class.name} when publishing #{batch.size} in groups of #{batch_size}, retrying...")
173
- if batch_size < 10
174
- batch_size = 1
175
- else
176
- batch_size /= 10
177
- end
215
+ batch_size = if batch_size < 10
216
+ 1
217
+ else
218
+ (batch_size / 10).to_i
219
+ end
178
220
  shutdown_producer
179
221
  retry
180
222
  end
@@ -183,7 +225,7 @@ module Deimos
183
225
  # @param batch [Array<Deimos::KafkaMessage>]
184
226
  # @return [Array<Deimos::KafkaMessage>]
185
227
  def compact_messages(batch)
186
- return batch unless batch.first&.key.present?
228
+ return batch if batch.first&.key.blank?
187
229
 
188
230
  topic = batch.first.topic
189
231
  return batch if config.compact_topics != :all &&
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ module Utils
5
+ # Utility class to retry a given block if a a deadlock is encountered.
6
+ # Supports Postgres and MySQL deadlocks and lock wait timeouts.
7
+ class DeadlockRetry
8
+ class << self
9
+ # Maximum number of times to retry the block after encountering a deadlock
10
+ RETRY_COUNT = 2
11
+
12
+ # Need to match on error messages to support older Rails versions
13
+ DEADLOCK_MESSAGES = [
14
+ # MySQL
15
+ 'Deadlock found when trying to get lock',
16
+ 'Lock wait timeout exceeded',
17
+
18
+ # Postgres
19
+ 'deadlock detected'
20
+ ].freeze
21
+
22
+ # Retry the given block when encountering a deadlock. For any other
23
+ # exceptions, they are reraised. This is used to handle cases where
24
+ # the database may be busy but the transaction would succeed if
25
+ # retried later. Note that your block should be idempotent and it will
26
+ # be wrapped in a transaction.
27
+ # Sleeps for a random number of seconds to prevent multiple transactions
28
+ # from retrying at the same time.
29
+ # @param tags [Array] Tags to attach when logging and reporting metrics.
30
+ # @yield Yields to the block that may deadlock.
31
+ def wrap(tags=[])
32
+ count = RETRY_COUNT
33
+
34
+ begin
35
+ ActiveRecord::Base.transaction do
36
+ yield
37
+ end
38
+ rescue ActiveRecord::StatementInvalid => e
39
+ # Reraise if not a known deadlock
40
+ raise if DEADLOCK_MESSAGES.none? { |m| e.message.include?(m) }
41
+
42
+ # Reraise if all retries exhausted
43
+ raise if count <= 0
44
+
45
+ Deimos.config.logger.warn(
46
+ message: 'Deadlock encountered when trying to execute query. '\
47
+ "Retrying. #{count} attempt(s) remaining",
48
+ tags: tags
49
+ )
50
+
51
+ Deimos.config.metrics&.increment(
52
+ 'deadlock',
53
+ tags: tags
54
+ )
55
+
56
+ count -= 1
57
+
58
+ # Sleep for a random amount so that if there are multiple
59
+ # transactions deadlocking, they don't all retry at the same time
60
+ sleep(Random.rand(5.0) + 0.5)
61
+
62
+ retry
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
68
+ end
@@ -29,30 +29,21 @@ module Deimos
29
29
  self.topics[topic.to_s].report_lag(partition)
30
30
  end
31
31
 
32
- # @param topic [String]
33
- # @param partition [Integer]
34
- # @param lag [Integer]
35
- def assign_lag(topic, partition, lag)
36
- self.topics[topic.to_s] ||= Topic.new(topic, self)
37
- self.topics[topic.to_s].assign_lag(partition, lag)
38
- end
39
-
40
- # Figure out the current lag by asking Kafka based on the current offset.
41
32
  # @param topic [String]
42
33
  # @param partition [Integer]
43
34
  # @param offset [Integer]
44
- def compute_lag(topic, partition, offset)
35
+ def assign_current_offset(topic, partition, offset)
45
36
  self.topics[topic.to_s] ||= Topic.new(topic, self)
46
- self.topics[topic.to_s].compute_lag(partition, offset)
37
+ self.topics[topic.to_s].assign_current_offset(partition, offset)
47
38
  end
48
39
  end
49
40
 
50
- # Topic which has a hash of partition => last known offset lag
41
+ # Topic which has a hash of partition => last known current offsets
51
42
  class Topic
52
43
  # @return [String]
53
44
  attr_accessor :topic_name
54
45
  # @return [Hash<Integer, Integer>]
55
- attr_accessor :partition_offset_lags
46
+ attr_accessor :partition_current_offsets
56
47
  # @return [ConsumerGroup]
57
48
  attr_accessor :consumer_group
58
49
 
@@ -61,35 +52,33 @@ module Deimos
61
52
  def initialize(topic_name, group)
62
53
  self.topic_name = topic_name
63
54
  self.consumer_group = group
64
- self.partition_offset_lags = {}
55
+ self.partition_current_offsets = {}
65
56
  end
66
57
 
67
58
  # @param partition [Integer]
68
- # @param lag [Integer]
69
- def assign_lag(partition, lag)
70
- self.partition_offset_lags[partition.to_i] = lag
59
+ def assign_current_offset(partition, offset)
60
+ self.partition_current_offsets[partition.to_i] = offset
71
61
  end
72
62
 
73
63
  # @param partition [Integer]
74
- # @param offset [Integer]
75
64
  def compute_lag(partition, offset)
76
- return if self.partition_offset_lags[partition.to_i]
77
-
78
65
  begin
79
66
  client = Phobos.create_kafka_client
80
67
  last_offset = client.last_offset_for(self.topic_name, partition)
81
- assign_lag(partition, [last_offset - offset, 0].max)
68
+ lag = last_offset - offset
82
69
  rescue StandardError # don't do anything, just wait
83
70
  Deimos.config.logger.
84
71
  debug("Error computing lag for #{self.topic_name}, will retry")
85
72
  end
73
+ lag || 0
86
74
  end
87
75
 
88
76
  # @param partition [Integer]
89
77
  def report_lag(partition)
90
- lag = self.partition_offset_lags[partition.to_i]
91
- return unless lag
78
+ current_offset = self.partition_current_offsets[partition.to_i]
79
+ return unless current_offset
92
80
 
81
+ lag = compute_lag(partition, current_offset)
93
82
  group = self.consumer_group.id
94
83
  Deimos.config.logger.
95
84
  debug("Sending lag: #{group}/#{partition}: #{lag}")
@@ -109,16 +98,20 @@ module Deimos
109
98
  @groups = {}
110
99
  end
111
100
 
101
+ # offset_lag = event.payload.fetch(:offset_lag)
102
+ # group_id = event.payload.fetch(:group_id)
103
+ # topic = event.payload.fetch(:topic)
104
+ # partition = event.payload.fetch(:partition)
112
105
  # @param payload [Hash]
113
106
  def message_processed(payload)
114
- lag = payload[:offset_lag]
107
+ offset = payload[:offset] || payload[:last_offset]
115
108
  topic = payload[:topic]
116
109
  group = payload[:group_id]
117
110
  partition = payload[:partition]
118
111
 
119
112
  synchronize do
120
113
  @groups[group.to_s] ||= ConsumerGroup.new(group)
121
- @groups[group.to_s].assign_lag(topic, partition, lag)
114
+ @groups[group.to_s].assign_current_offset(topic, partition, offset)
122
115
  end
123
116
  end
124
117
 
@@ -131,7 +124,7 @@ module Deimos
131
124
 
132
125
  synchronize do
133
126
  @groups[group.to_s] ||= ConsumerGroup.new(group)
134
- @groups[group.to_s].compute_lag(topic, partition, offset)
127
+ @groups[group.to_s].assign_current_offset(topic, partition, offset)
135
128
  end
136
129
  end
137
130
 
@@ -0,0 +1,111 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ module Utils
5
+ # Mixin to automatically decode schema-encoded payloads when given the correct content type,
6
+ # and provide the `render_schema` method to encode the payload for responses.
7
+ module SchemaControllerMixin
8
+ extend ActiveSupport::Concern
9
+
10
+ included do
11
+ Mime::Type.register('avro/binary', :avro)
12
+
13
+ attr_accessor :payload
14
+
15
+ if respond_to?(:before_filter)
16
+ before_filter(:decode_schema, if: :schema_format?)
17
+ else
18
+ before_action(:decode_schema, if: :schema_format?)
19
+ end
20
+ end
21
+
22
+ # :nodoc:
23
+ module ClassMethods
24
+ # @return [Hash<String, Hash<Symbol, String>>]
25
+ def schema_mapping
26
+ @schema_mapping ||= {}
27
+ end
28
+
29
+ # Indicate which schemas should be assigned to actions.
30
+ # @param actions [Symbol]
31
+ # @param request [String]
32
+ # @param response [String]
33
+ def schemas(*actions, request: nil, response: nil)
34
+ actions.each do |action|
35
+ request ||= action.to_s.titleize
36
+ response ||= action.to_s.titleize
37
+ schema_mapping[action.to_s] = { request: request, response: response }
38
+ end
39
+ end
40
+
41
+ # @return [Hash<Symbol, String>]
42
+ def namespaces
43
+ @namespaces ||= {}
44
+ end
45
+
46
+ # Set the namespace for both requests and responses.
47
+ # @param name [String]
48
+ def namespace(name)
49
+ request_namespace(name)
50
+ response_namespace(name)
51
+ end
52
+
53
+ # Set the namespace for requests.
54
+ # @param name [String]
55
+ def request_namespace(name)
56
+ namespaces[:request] = name
57
+ end
58
+
59
+ # Set the namespace for repsonses.
60
+ # @param name [String]
61
+ def response_namespace(name)
62
+ namespaces[:response] = name
63
+ end
64
+ end
65
+
66
+ # @return [Boolean]
67
+ def schema_format?
68
+ request.content_type == Deimos.schema_backend_class.content_type
69
+ end
70
+
71
+ # Get the namespace from either an existing instance variable, or tease it out of the schema.
72
+ # @param type [Symbol] :request or :response
73
+ # @return [Array<String, String>] the namespace and schema.
74
+ def parse_namespace(type)
75
+ namespace = self.class.namespaces[type]
76
+ schema = self.class.schema_mapping[params['action']][type]
77
+ if schema.nil?
78
+ raise "No #{type} schema defined for #{params[:controller]}##{params[:action]}!"
79
+ end
80
+
81
+ if namespace.nil?
82
+ last_period = schema.rindex('.')
83
+ namespace, schema = schema.split(last_period)
84
+ end
85
+ if namespace.nil? || schema.nil?
86
+ raise "No request namespace defined for #{params[:controller]}##{params[:action]}!"
87
+ end
88
+
89
+ [namespace, schema]
90
+ end
91
+
92
+ # Decode the payload with the parameters.
93
+ def decode_schema
94
+ namespace, schema = parse_namespace(:request)
95
+ decoder = Deimos.schema_backend(schema: schema, namespace: namespace)
96
+ @payload = decoder.decode(request.body.read).with_indifferent_access
97
+ request.body.rewind if request.body.respond_to?(:rewind)
98
+ end
99
+
100
+ # Render a hash into a payload as specified by the configured schema and namespace.
101
+ # @param payload [Hash]
102
+ def render_schema(payload, schema: nil, namespace: nil)
103
+ namespace, schema = parse_namespace(:response) if !schema && !namespace
104
+ encoder = Deimos.schema_backend(schema: schema, namespace: namespace)
105
+ encoded = encoder.encode(payload)
106
+ response.headers['Content-Type'] = encoder.class.content_type
107
+ send_data(encoded)
108
+ end
109
+ end
110
+ end
111
+ end