deimos-ruby 1.8.0.pre.beta1 → 1.8.1.pre.beta4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +8 -4
  3. data/CHANGELOG.md +42 -0
  4. data/Gemfile.lock +101 -73
  5. data/README.md +78 -1
  6. data/deimos-ruby.gemspec +2 -2
  7. data/lib/deimos.rb +4 -3
  8. data/lib/deimos/consume/batch_consumption.rb +2 -0
  9. data/lib/deimos/consume/message_consumption.rb +1 -0
  10. data/lib/deimos/instrumentation.rb +10 -5
  11. data/lib/deimos/kafka_topic_info.rb +21 -2
  12. data/lib/deimos/schema_backends/avro_base.rb +33 -1
  13. data/lib/deimos/schema_backends/avro_schema_coercer.rb +30 -9
  14. data/lib/deimos/schema_backends/base.rb +21 -2
  15. data/lib/deimos/utils/db_producer.rb +57 -19
  16. data/lib/deimos/utils/schema_controller_mixin.rb +111 -0
  17. data/lib/deimos/version.rb +1 -1
  18. data/lib/generators/deimos/active_record/templates/migration.rb.tt +28 -0
  19. data/lib/generators/deimos/active_record/templates/model.rb.tt +5 -0
  20. data/lib/generators/deimos/active_record_generator.rb +79 -0
  21. data/lib/generators/deimos/db_backend/templates/migration +1 -0
  22. data/lib/generators/deimos/db_backend/templates/rails3_migration +1 -0
  23. data/spec/batch_consumer_spec.rb +1 -0
  24. data/spec/generators/active_record_generator_spec.rb +56 -0
  25. data/spec/kafka_listener_spec.rb +54 -0
  26. data/spec/kafka_topic_info_spec.rb +39 -16
  27. data/spec/producer_spec.rb +36 -0
  28. data/spec/schemas/com/my-namespace/Generated.avsc +71 -0
  29. data/spec/schemas/com/my-namespace/MyNestedSchema.avsc +62 -0
  30. data/spec/schemas/com/my-namespace/request/Index.avsc +11 -0
  31. data/spec/schemas/com/my-namespace/request/UpdateRequest.avsc +11 -0
  32. data/spec/schemas/com/my-namespace/response/Index.avsc +11 -0
  33. data/spec/schemas/com/my-namespace/response/UpdateResponse.avsc +11 -0
  34. data/spec/spec_helper.rb +7 -0
  35. data/spec/utils/db_producer_spec.rb +84 -10
  36. data/spec/utils/schema_controller_mixin_spec.rb +68 -0
  37. metadata +40 -24
@@ -23,7 +23,6 @@ Gem::Specification.new do |spec|
23
23
  spec.add_runtime_dependency('ruby-kafka', '~> 0.7')
24
24
  spec.add_runtime_dependency('sigurd', '0.0.1')
25
25
 
26
- spec.add_development_dependency('activerecord', '~> 5.2')
27
26
  spec.add_development_dependency('activerecord-import')
28
27
  spec.add_development_dependency('avro', '~> 1.9')
29
28
  spec.add_development_dependency('database_cleaner', '~> 1.7')
@@ -34,10 +33,11 @@ Gem::Specification.new do |spec|
34
33
  spec.add_development_dependency('guard-rubocop', '~> 1')
35
34
  spec.add_development_dependency('mysql2', '~> 0.5')
36
35
  spec.add_development_dependency('pg', '~> 1.1')
37
- spec.add_development_dependency('rails', '~> 5.2', '>= 5.2.4.2')
36
+ spec.add_development_dependency('rails', '~> 6')
38
37
  spec.add_development_dependency('rake', '~> 13')
39
38
  spec.add_development_dependency('rspec', '~> 3')
40
39
  spec.add_development_dependency('rspec_junit_formatter', '~>0.3')
40
+ spec.add_development_dependency('rspec-rails', '~> 4')
41
41
  spec.add_development_dependency('rubocop', '~> 0.72')
42
42
  spec.add_development_dependency('rubocop-rspec', '~> 1.27')
43
43
  spec.add_development_dependency('sqlite3', '~> 1.3')
@@ -23,14 +23,15 @@ require 'deimos/monkey_patches/phobos_producer'
23
23
  require 'deimos/monkey_patches/phobos_cli'
24
24
 
25
25
  require 'deimos/railtie' if defined?(Rails)
26
+ require 'deimos/utils/schema_controller_mixin' if defined?(ActionController)
26
27
 
27
28
  if defined?(ActiveRecord)
28
29
  require 'deimos/kafka_source'
29
30
  require 'deimos/kafka_topic_info'
30
31
  require 'deimos/backends/db'
31
- require 'sigurd/signal_handler.rb'
32
- require 'sigurd/executor.rb'
33
- require 'deimos/utils/db_producer.rb'
32
+ require 'sigurd/signal_handler'
33
+ require 'sigurd/executor'
34
+ require 'deimos/utils/db_producer'
34
35
  require 'deimos/utils/db_poller'
35
36
  end
36
37
 
@@ -6,6 +6,7 @@ module Deimos
6
6
  # delivery. Payloads are decoded then consumers are invoked with arrays
7
7
  # of messages to be handled at once
8
8
  module BatchConsumption
9
+ extend ActiveSupport::Concern
9
10
  include Phobos::BatchHandler
10
11
 
11
12
  # :nodoc:
@@ -17,6 +18,7 @@ module Deimos
17
18
  decode_key(message.key)
18
19
  end
19
20
  end
21
+ metadata[:first_offset] = batch.first&.offset
20
22
 
21
23
  payloads = batch.map do |message|
22
24
  message.payload ? self.class.decoder.decode(message.payload) : nil
@@ -5,6 +5,7 @@ module Deimos
5
5
  # Methods used by message-by-message (non-batch) consumers. These consumers
6
6
  # are invoked for every individual message.
7
7
  module MessageConsumption
8
+ extend ActiveSupport::Concern
8
9
  include Phobos::Handler
9
10
 
10
11
  # :nodoc:
@@ -46,13 +46,18 @@ module Deimos
46
46
 
47
47
  messages = exception.failed_messages
48
48
  messages.group_by(&:topic).each do |topic, batch|
49
- next if batch.empty?
49
+ producer = Deimos::Producer.descendants.find { |c| c.topic == topic }
50
+ next if batch.empty? || !producer
50
51
 
51
- producer = batch.first.metadata[:producer_name]
52
- payloads = batch.map { |m| m.metadata[:decoded_payload] }
52
+ decoder = Deimos.schema_backend(schema: producer.config[:schema],
53
+ namespace: producer.config[:namespace])
54
+ payloads = batch.map { |m| decoder.decode(m.value) }
53
55
 
54
- Deimos.config.metrics&.count('publish_error', payloads.size,
55
- tags: %W(topic:#{topic}))
56
+ Deimos.config.metrics&.increment(
57
+ 'publish_error',
58
+ tags: %W(topic:#{topic}),
59
+ by: payloads.size
60
+ )
56
61
  Deimos.instrument(
57
62
  'produce_error',
58
63
  producer: producer,
@@ -13,7 +13,7 @@ module Deimos
13
13
  def lock(topic, lock_id)
14
14
  # Try to create it - it's fine if it already exists
15
15
  begin
16
- self.create(topic: topic)
16
+ self.create(topic: topic, last_processed_at: Time.zone.now)
17
17
  rescue ActiveRecord::RecordNotUnique
18
18
  # continue on
19
19
  end
@@ -52,7 +52,26 @@ module Deimos
52
52
  # @param lock_id [String]
53
53
  def clear_lock(topic, lock_id)
54
54
  self.where(topic: topic, locked_by: lock_id).
55
- update_all(locked_by: nil, locked_at: nil, error: false, retries: 0)
55
+ update_all(locked_by: nil,
56
+ locked_at: nil,
57
+ error: false,
58
+ retries: 0,
59
+ last_processed_at: Time.zone.now)
60
+ end
61
+
62
+ # Update all topics that aren't currently locked and have no messages
63
+ # waiting. It's OK if some messages get inserted in the middle of this
64
+ # because the point is that at least within a few milliseconds of each
65
+ # other, it wasn't locked and had no messages, meaning the topic
66
+ # was in a good state.
67
+ # @param except_topics [Array<String>] the list of topics we've just
68
+ # realized had messages in them, meaning all other topics were empty.
69
+ def ping_empty_topics(except_topics)
70
+ records = KafkaTopicInfo.where(locked_by: nil).
71
+ where('topic not in(?)', except_topics)
72
+ records.each do |info|
73
+ info.update_attribute(:last_processed_at, Time.zone.now)
74
+ end
56
75
  end
57
76
 
58
77
  # The producer calls this if it gets an error sending messages. This
@@ -33,6 +33,30 @@ module Deimos
33
33
  decode(payload, schema: @key_schema['name'])[field_name]
34
34
  end
35
35
 
36
+ # :nodoc:
37
+ def sql_type(field)
38
+ type = field.type.type
39
+ return type if %w(array map record).include?(type)
40
+
41
+ if type == :union
42
+ non_null = field.type.schemas.reject { |f| f.type == :null }
43
+ if non_null.size > 1
44
+ warn("WARNING: #{field.name} has more than one non-null type. Picking the first for the SQL type.")
45
+ end
46
+ return non_null.first.type
47
+ end
48
+ return type.to_sym if %w(float boolean).include?(type)
49
+ return :integer if type == 'int'
50
+ return :bigint if type == 'long'
51
+
52
+ if type == 'double'
53
+ warn('Avro `double` type turns into SQL `float` type. Please ensure you have the correct `limit` set.')
54
+ return :float
55
+ end
56
+
57
+ :string
58
+ end
59
+
36
60
  # @override
37
61
  def coerce_field(field, value)
38
62
  AvroSchemaCoercer.new(avro_schema).coerce_type(field.type, value)
@@ -40,7 +64,10 @@ module Deimos
40
64
 
41
65
  # @override
42
66
  def schema_fields
43
- avro_schema.fields.map { |field| SchemaField.new(field.name, field.type) }
67
+ avro_schema.fields.map do |field|
68
+ enum_values = field.type.type == 'enum' ? field.type.symbols : []
69
+ SchemaField.new(field.name, field.type, enum_values)
70
+ end
44
71
  end
45
72
 
46
73
  # @override
@@ -55,6 +82,11 @@ module Deimos
55
82
  :avro_validation
56
83
  end
57
84
 
85
+ # @override
86
+ def self.content_type
87
+ 'avro/binary'
88
+ end
89
+
58
90
  private
59
91
 
60
92
  # @param schema [String]
@@ -10,18 +10,37 @@ module Deimos
10
10
  @schema = schema
11
11
  end
12
12
 
13
- # @param type [Symbol]
13
+ # Coerce sub-records in a payload to match the schema.
14
+ # @param type [Avro::Schema::UnionSchema]
15
+ # @param val [Object]
16
+ # @return [Object]
17
+ def coerce_union(type, val)
18
+ union_types = type.schemas.map { |s| s.type.to_sym }
19
+ return nil if val.nil? && union_types.include?(:null)
20
+
21
+ schema_type = type.schemas.find { |s| s.type.to_sym != :null }
22
+ coerce_type(schema_type, val)
23
+ end
24
+
25
+ # Coerce sub-records in a payload to match the schema.
26
+ # @param type [Avro::Schema::RecordSchema]
27
+ # @param val [Object]
28
+ # @return [Object]
29
+ def coerce_record(type, val)
30
+ record = val.map do |name, value|
31
+ field = type.fields.find { |f| f.name == name }
32
+ coerce_type(field.type, value)
33
+ end
34
+ val.keys.zip(record).to_h
35
+ end
36
+
37
+ # Coerce values in a payload to match the schema.
38
+ # @param type [Avro::Schema]
14
39
  # @param val [Object]
15
40
  # @return [Object]
16
41
  def coerce_type(type, val)
17
42
  int_classes = [Time, ActiveSupport::TimeWithZone]
18
43
  field_type = type.type.to_sym
19
- if field_type == :union
20
- union_types = type.schemas.map { |s| s.type.to_sym }
21
- return nil if val.nil? && union_types.include?(:null)
22
-
23
- field_type = union_types.find { |t| t != :null }
24
- end
25
44
 
26
45
  case field_type
27
46
  when :int, :long
@@ -32,14 +51,12 @@ module Deimos
32
51
  else
33
52
  val # this will fail
34
53
  end
35
-
36
54
  when :float, :double
37
55
  if val.is_a?(Numeric) || _is_float_string?(val)
38
56
  val.to_f
39
57
  else
40
58
  val # this will fail
41
59
  end
42
-
43
60
  when :string
44
61
  if val.respond_to?(:to_str)
45
62
  val.to_s
@@ -54,6 +71,10 @@ module Deimos
54
71
  else
55
72
  true
56
73
  end
74
+ when :union
75
+ coerce_union(type, val)
76
+ when :record
77
+ coerce_record(type, val)
57
78
  else
58
79
  val
59
80
  end
@@ -3,13 +3,15 @@
3
3
  module Deimos
4
4
  # Represents a field in the schema.
5
5
  class SchemaField
6
- attr_accessor :name, :type
6
+ attr_accessor :name, :type, :enum_values
7
7
 
8
8
  # @param name [String]
9
9
  # @param type [Object]
10
- def initialize(name, type)
10
+ # @param enum_values [Array<String>]
11
+ def initialize(name, type, enum_values=[])
11
12
  @name = name
12
13
  @type = type
14
+ @enum_values = enum_values
13
15
  end
14
16
  end
15
17
 
@@ -69,6 +71,12 @@ module Deimos
69
71
  :mock
70
72
  end
71
73
 
74
+ # The content type to use when encoding / decoding requests over HTTP via ActionController.
75
+ # @return [String]
76
+ def self.content_type
77
+ raise NotImplementedError
78
+ end
79
+
72
80
  # Encode a payload. To be defined by subclass.
73
81
  # @param payload [Hash]
74
82
  # @param schema [Symbol|String]
@@ -109,6 +117,17 @@ module Deimos
109
117
  raise NotImplementedError
110
118
  end
111
119
 
120
+ # Given a field definition, return the SQL type that might be used in
121
+ # ActiveRecord table creation - e.g. for Avro, a `long` type would
122
+ # return `:bigint`. There are also special values that need to be returned:
123
+ # `:array`, `:map` and `:record`, for types representing those structures.
124
+ # `:enum` is also recognized.
125
+ # @param field [SchemaField]
126
+ # @return [Symbol]
127
+ def sql_type(field)
128
+ raise NotImplementedError
129
+ end
130
+
112
131
  # Encode a message key. To be defined by subclass.
113
132
  # @param key [String|Hash] the value to use as the key.
114
133
  # @param key_id [Symbol|String] the field name of the key.
@@ -9,6 +9,8 @@ module Deimos
9
9
  attr_accessor :id, :current_topic
10
10
 
11
11
  BATCH_SIZE = 1000
12
+ DELETE_BATCH_SIZE = 10
13
+ MAX_DELETE_ATTEMPTS = 3
12
14
 
13
15
  # @param logger [Logger]
14
16
  def initialize(logger=Logger.new(STDOUT))
@@ -48,6 +50,7 @@ module Deimos
48
50
  topics = retrieve_topics
49
51
  @logger.info("Found topics: #{topics}")
50
52
  topics.each(&method(:process_topic))
53
+ KafkaTopicInfo.ping_empty_topics(topics)
51
54
  sleep(0.5)
52
55
  end
53
56
 
@@ -87,13 +90,13 @@ module Deimos
87
90
  begin
88
91
  produce_messages(compacted_messages.map(&:phobos_message))
89
92
  rescue Kafka::BufferOverflow, Kafka::MessageSizeTooLarge, Kafka::RecordListTooLarge
93
+ delete_messages(messages)
90
94
  @logger.error('Message batch too large, deleting...')
91
95
  @logger.error(Deimos::KafkaMessage.decoded(messages))
92
- Deimos::KafkaMessage.where(id: messages.map(&:id)).delete_all
93
96
  raise
94
97
  end
95
98
  end
96
- Deimos::KafkaMessage.where(id: messages.map(&:id)).delete_all
99
+ delete_messages(messages)
97
100
  Deimos.config.metrics&.increment(
98
101
  'db_producer.process',
99
102
  tags: %W(topic:#{@current_topic}),
@@ -106,6 +109,27 @@ module Deimos
106
109
  true
107
110
  end
108
111
 
112
+ # @param messages [Array<Deimos::KafkaMessage>]
113
+ def delete_messages(messages)
114
+ attempts = 1
115
+ begin
116
+ messages.in_groups_of(DELETE_BATCH_SIZE, false).each do |batch|
117
+ Deimos::KafkaMessage.where(topic: batch.first.topic,
118
+ id: batch.map(&:id)).
119
+ delete_all
120
+ end
121
+ rescue StandardError => e
122
+ if (e.message =~ /Lock wait/i || e.message =~ /Lost connection/i) &&
123
+ attempts <= MAX_DELETE_ATTEMPTS
124
+ attempts += 1
125
+ ActiveRecord::Base.connection.verify!
126
+ sleep(1)
127
+ retry
128
+ end
129
+ raise
130
+ end
131
+ end
132
+
109
133
  # @return [Array<Deimos::KafkaMessage>]
110
134
  def retrieve_messages
111
135
  KafkaMessage.where(topic: @current_topic).order(:id).limit(BATCH_SIZE)
@@ -126,19 +150,33 @@ module Deimos
126
150
  metrics = Deimos.config.metrics
127
151
  return unless metrics
128
152
 
153
+ topics = KafkaTopicInfo.select(%w(topic last_processed_at))
129
154
  messages = Deimos::KafkaMessage.
130
155
  select('count(*) as num_messages, min(created_at) as earliest, topic').
131
- group(:topic)
132
- if messages.none?
133
- metrics.gauge('pending_db_messages_max_wait', 0)
134
- end
135
- messages.each do |record|
136
- earliest = record.earliest
137
- # SQLite gives a string here
138
- earliest = Time.zone.parse(earliest) if earliest.is_a?(String)
139
-
140
- time_diff = Time.zone.now - earliest
141
- metrics.gauge('pending_db_messages_max_wait', time_diff,
156
+ group(:topic).
157
+ index_by(&:topic)
158
+ topics.each do |record|
159
+ message_record = messages[record.topic]
160
+ # We want to record the last time we saw any activity, meaning either
161
+ # the oldest message, or the last time we processed, whichever comes
162
+ # last.
163
+ if message_record
164
+ record_earliest = record.earliest
165
+ # SQLite gives a string here
166
+ if record_earliest.is_a?(String)
167
+ record_earliest = Time.zone.parse(record_earliest)
168
+ end
169
+
170
+ earliest = [record.last_processed_at, record_earliest].max
171
+ time_diff = Time.zone.now - earliest
172
+ metrics.gauge('pending_db_messages_max_wait', time_diff,
173
+ tags: ["topic:#{record.topic}"])
174
+ else
175
+ # no messages waiting
176
+ metrics.gauge('pending_db_messages_max_wait', 0,
177
+ tags: ["topic:#{record.topic}"])
178
+ end
179
+ metrics.gauge('pending_db_messages_count', message_record&.num_messages || 0,
142
180
  tags: ["topic:#{record.topic}"])
143
181
  end
144
182
  end
@@ -174,11 +212,11 @@ module Deimos
174
212
  end
175
213
 
176
214
  @logger.error("Got error #{e.class.name} when publishing #{batch.size} in groups of #{batch_size}, retrying...")
177
- if batch_size < 10
178
- batch_size = 1
179
- else
180
- batch_size /= 10
181
- end
215
+ batch_size = if batch_size < 10
216
+ 1
217
+ else
218
+ (batch_size / 10).to_i
219
+ end
182
220
  shutdown_producer
183
221
  retry
184
222
  end
@@ -187,7 +225,7 @@ module Deimos
187
225
  # @param batch [Array<Deimos::KafkaMessage>]
188
226
  # @return [Array<Deimos::KafkaMessage>]
189
227
  def compact_messages(batch)
190
- return batch unless batch.first&.key.present?
228
+ return batch if batch.first&.key.blank?
191
229
 
192
230
  topic = batch.first.topic
193
231
  return batch if config.compact_topics != :all &&
@@ -0,0 +1,111 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ module Utils
5
+ # Mixin to automatically decode schema-encoded payloads when given the correct content type,
6
+ # and provide the `render_schema` method to encode the payload for responses.
7
+ module SchemaControllerMixin
8
+ extend ActiveSupport::Concern
9
+
10
+ included do
11
+ Mime::Type.register('avro/binary', :avro)
12
+
13
+ attr_accessor :payload
14
+
15
+ if respond_to?(:before_filter)
16
+ before_filter(:decode_schema, if: :schema_format?)
17
+ else
18
+ before_action(:decode_schema, if: :schema_format?)
19
+ end
20
+ end
21
+
22
+ # :nodoc:
23
+ module ClassMethods
24
+ # @return [Hash<String, Hash<Symbol, String>>]
25
+ def schema_mapping
26
+ @schema_mapping ||= {}
27
+ end
28
+
29
+ # Indicate which schemas should be assigned to actions.
30
+ # @param actions [Symbol]
31
+ # @param request [String]
32
+ # @param response [String]
33
+ def schemas(*actions, request: nil, response: nil)
34
+ actions.each do |action|
35
+ request ||= action.to_s.titleize
36
+ response ||= action.to_s.titleize
37
+ schema_mapping[action.to_s] = { request: request, response: response }
38
+ end
39
+ end
40
+
41
+ # @return [Hash<Symbol, String>]
42
+ def namespaces
43
+ @namespaces ||= {}
44
+ end
45
+
46
+ # Set the namespace for both requests and responses.
47
+ # @param name [String]
48
+ def namespace(name)
49
+ request_namespace(name)
50
+ response_namespace(name)
51
+ end
52
+
53
+ # Set the namespace for requests.
54
+ # @param name [String]
55
+ def request_namespace(name)
56
+ namespaces[:request] = name
57
+ end
58
+
59
+ # Set the namespace for repsonses.
60
+ # @param name [String]
61
+ def response_namespace(name)
62
+ namespaces[:response] = name
63
+ end
64
+ end
65
+
66
+ # @return [Boolean]
67
+ def schema_format?
68
+ request.content_type == Deimos.schema_backend_class.content_type
69
+ end
70
+
71
+ # Get the namespace from either an existing instance variable, or tease it out of the schema.
72
+ # @param type [Symbol] :request or :response
73
+ # @return [Array<String, String>] the namespace and schema.
74
+ def parse_namespace(type)
75
+ namespace = self.class.namespaces[type]
76
+ schema = self.class.schema_mapping[params['action']][type]
77
+ if schema.nil?
78
+ raise "No #{type} schema defined for #{params[:controller]}##{params[:action]}!"
79
+ end
80
+
81
+ if namespace.nil?
82
+ last_period = schema.rindex('.')
83
+ namespace, schema = schema.split(last_period)
84
+ end
85
+ if namespace.nil? || schema.nil?
86
+ raise "No request namespace defined for #{params[:controller]}##{params[:action]}!"
87
+ end
88
+
89
+ [namespace, schema]
90
+ end
91
+
92
+ # Decode the payload with the parameters.
93
+ def decode_schema
94
+ namespace, schema = parse_namespace(:request)
95
+ decoder = Deimos.schema_backend(schema: schema, namespace: namespace)
96
+ @payload = decoder.decode(request.body.read).with_indifferent_access
97
+ request.body.rewind if request.body.respond_to?(:rewind)
98
+ end
99
+
100
+ # Render a hash into a payload as specified by the configured schema and namespace.
101
+ # @param payload [Hash]
102
+ def render_schema(payload, schema: nil, namespace: nil)
103
+ namespace, schema = parse_namespace(:response) if !schema && !namespace
104
+ encoder = Deimos.schema_backend(schema: schema, namespace: namespace)
105
+ encoded = encoder.encode(payload)
106
+ response.headers['Content-Type'] = encoder.class.content_type
107
+ send_data(encoded)
108
+ end
109
+ end
110
+ end
111
+ end