deimos-ruby 1.8.0.pre.beta1 → 1.8.1.pre.beta4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +8 -4
  3. data/CHANGELOG.md +42 -0
  4. data/Gemfile.lock +101 -73
  5. data/README.md +78 -1
  6. data/deimos-ruby.gemspec +2 -2
  7. data/lib/deimos.rb +4 -3
  8. data/lib/deimos/consume/batch_consumption.rb +2 -0
  9. data/lib/deimos/consume/message_consumption.rb +1 -0
  10. data/lib/deimos/instrumentation.rb +10 -5
  11. data/lib/deimos/kafka_topic_info.rb +21 -2
  12. data/lib/deimos/schema_backends/avro_base.rb +33 -1
  13. data/lib/deimos/schema_backends/avro_schema_coercer.rb +30 -9
  14. data/lib/deimos/schema_backends/base.rb +21 -2
  15. data/lib/deimos/utils/db_producer.rb +57 -19
  16. data/lib/deimos/utils/schema_controller_mixin.rb +111 -0
  17. data/lib/deimos/version.rb +1 -1
  18. data/lib/generators/deimos/active_record/templates/migration.rb.tt +28 -0
  19. data/lib/generators/deimos/active_record/templates/model.rb.tt +5 -0
  20. data/lib/generators/deimos/active_record_generator.rb +79 -0
  21. data/lib/generators/deimos/db_backend/templates/migration +1 -0
  22. data/lib/generators/deimos/db_backend/templates/rails3_migration +1 -0
  23. data/spec/batch_consumer_spec.rb +1 -0
  24. data/spec/generators/active_record_generator_spec.rb +56 -0
  25. data/spec/kafka_listener_spec.rb +54 -0
  26. data/spec/kafka_topic_info_spec.rb +39 -16
  27. data/spec/producer_spec.rb +36 -0
  28. data/spec/schemas/com/my-namespace/Generated.avsc +71 -0
  29. data/spec/schemas/com/my-namespace/MyNestedSchema.avsc +62 -0
  30. data/spec/schemas/com/my-namespace/request/Index.avsc +11 -0
  31. data/spec/schemas/com/my-namespace/request/UpdateRequest.avsc +11 -0
  32. data/spec/schemas/com/my-namespace/response/Index.avsc +11 -0
  33. data/spec/schemas/com/my-namespace/response/UpdateResponse.avsc +11 -0
  34. data/spec/spec_helper.rb +7 -0
  35. data/spec/utils/db_producer_spec.rb +84 -10
  36. data/spec/utils/schema_controller_mixin_spec.rb +68 -0
  37. metadata +40 -24
@@ -23,7 +23,6 @@ Gem::Specification.new do |spec|
23
23
  spec.add_runtime_dependency('ruby-kafka', '~> 0.7')
24
24
  spec.add_runtime_dependency('sigurd', '0.0.1')
25
25
 
26
- spec.add_development_dependency('activerecord', '~> 5.2')
27
26
  spec.add_development_dependency('activerecord-import')
28
27
  spec.add_development_dependency('avro', '~> 1.9')
29
28
  spec.add_development_dependency('database_cleaner', '~> 1.7')
@@ -34,10 +33,11 @@ Gem::Specification.new do |spec|
34
33
  spec.add_development_dependency('guard-rubocop', '~> 1')
35
34
  spec.add_development_dependency('mysql2', '~> 0.5')
36
35
  spec.add_development_dependency('pg', '~> 1.1')
37
- spec.add_development_dependency('rails', '~> 5.2', '>= 5.2.4.2')
36
+ spec.add_development_dependency('rails', '~> 6')
38
37
  spec.add_development_dependency('rake', '~> 13')
39
38
  spec.add_development_dependency('rspec', '~> 3')
40
39
  spec.add_development_dependency('rspec_junit_formatter', '~>0.3')
40
+ spec.add_development_dependency('rspec-rails', '~> 4')
41
41
  spec.add_development_dependency('rubocop', '~> 0.72')
42
42
  spec.add_development_dependency('rubocop-rspec', '~> 1.27')
43
43
  spec.add_development_dependency('sqlite3', '~> 1.3')
@@ -23,14 +23,15 @@ require 'deimos/monkey_patches/phobos_producer'
23
23
  require 'deimos/monkey_patches/phobos_cli'
24
24
 
25
25
  require 'deimos/railtie' if defined?(Rails)
26
+ require 'deimos/utils/schema_controller_mixin' if defined?(ActionController)
26
27
 
27
28
  if defined?(ActiveRecord)
28
29
  require 'deimos/kafka_source'
29
30
  require 'deimos/kafka_topic_info'
30
31
  require 'deimos/backends/db'
31
- require 'sigurd/signal_handler.rb'
32
- require 'sigurd/executor.rb'
33
- require 'deimos/utils/db_producer.rb'
32
+ require 'sigurd/signal_handler'
33
+ require 'sigurd/executor'
34
+ require 'deimos/utils/db_producer'
34
35
  require 'deimos/utils/db_poller'
35
36
  end
36
37
 
@@ -6,6 +6,7 @@ module Deimos
6
6
  # delivery. Payloads are decoded then consumers are invoked with arrays
7
7
  # of messages to be handled at once
8
8
  module BatchConsumption
9
+ extend ActiveSupport::Concern
9
10
  include Phobos::BatchHandler
10
11
 
11
12
  # :nodoc:
@@ -17,6 +18,7 @@ module Deimos
17
18
  decode_key(message.key)
18
19
  end
19
20
  end
21
+ metadata[:first_offset] = batch.first&.offset
20
22
 
21
23
  payloads = batch.map do |message|
22
24
  message.payload ? self.class.decoder.decode(message.payload) : nil
@@ -5,6 +5,7 @@ module Deimos
5
5
  # Methods used by message-by-message (non-batch) consumers. These consumers
6
6
  # are invoked for every individual message.
7
7
  module MessageConsumption
8
+ extend ActiveSupport::Concern
8
9
  include Phobos::Handler
9
10
 
10
11
  # :nodoc:
@@ -46,13 +46,18 @@ module Deimos
46
46
 
47
47
  messages = exception.failed_messages
48
48
  messages.group_by(&:topic).each do |topic, batch|
49
- next if batch.empty?
49
+ producer = Deimos::Producer.descendants.find { |c| c.topic == topic }
50
+ next if batch.empty? || !producer
50
51
 
51
- producer = batch.first.metadata[:producer_name]
52
- payloads = batch.map { |m| m.metadata[:decoded_payload] }
52
+ decoder = Deimos.schema_backend(schema: producer.config[:schema],
53
+ namespace: producer.config[:namespace])
54
+ payloads = batch.map { |m| decoder.decode(m.value) }
53
55
 
54
- Deimos.config.metrics&.count('publish_error', payloads.size,
55
- tags: %W(topic:#{topic}))
56
+ Deimos.config.metrics&.increment(
57
+ 'publish_error',
58
+ tags: %W(topic:#{topic}),
59
+ by: payloads.size
60
+ )
56
61
  Deimos.instrument(
57
62
  'produce_error',
58
63
  producer: producer,
@@ -13,7 +13,7 @@ module Deimos
13
13
  def lock(topic, lock_id)
14
14
  # Try to create it - it's fine if it already exists
15
15
  begin
16
- self.create(topic: topic)
16
+ self.create(topic: topic, last_processed_at: Time.zone.now)
17
17
  rescue ActiveRecord::RecordNotUnique
18
18
  # continue on
19
19
  end
@@ -52,7 +52,26 @@ module Deimos
52
52
  # @param lock_id [String]
53
53
  def clear_lock(topic, lock_id)
54
54
  self.where(topic: topic, locked_by: lock_id).
55
- update_all(locked_by: nil, locked_at: nil, error: false, retries: 0)
55
+ update_all(locked_by: nil,
56
+ locked_at: nil,
57
+ error: false,
58
+ retries: 0,
59
+ last_processed_at: Time.zone.now)
60
+ end
61
+
62
+ # Update all topics that aren't currently locked and have no messages
63
+ # waiting. It's OK if some messages get inserted in the middle of this
64
+ # because the point is that at least within a few milliseconds of each
65
+ # other, it wasn't locked and had no messages, meaning the topic
66
+ # was in a good state.
67
+ # @param except_topics [Array<String>] the list of topics we've just
68
+ # realized had messages in them, meaning all other topics were empty.
69
+ def ping_empty_topics(except_topics)
70
+ records = KafkaTopicInfo.where(locked_by: nil).
71
+ where('topic not in(?)', except_topics)
72
+ records.each do |info|
73
+ info.update_attribute(:last_processed_at, Time.zone.now)
74
+ end
56
75
  end
57
76
 
58
77
  # The producer calls this if it gets an error sending messages. This
@@ -33,6 +33,30 @@ module Deimos
33
33
  decode(payload, schema: @key_schema['name'])[field_name]
34
34
  end
35
35
 
36
+ # :nodoc:
37
+ def sql_type(field)
38
+ type = field.type.type
39
+ return type if %w(array map record).include?(type)
40
+
41
+ if type == :union
42
+ non_null = field.type.schemas.reject { |f| f.type == :null }
43
+ if non_null.size > 1
44
+ warn("WARNING: #{field.name} has more than one non-null type. Picking the first for the SQL type.")
45
+ end
46
+ return non_null.first.type
47
+ end
48
+ return type.to_sym if %w(float boolean).include?(type)
49
+ return :integer if type == 'int'
50
+ return :bigint if type == 'long'
51
+
52
+ if type == 'double'
53
+ warn('Avro `double` type turns into SQL `float` type. Please ensure you have the correct `limit` set.')
54
+ return :float
55
+ end
56
+
57
+ :string
58
+ end
59
+
36
60
  # @override
37
61
  def coerce_field(field, value)
38
62
  AvroSchemaCoercer.new(avro_schema).coerce_type(field.type, value)
@@ -40,7 +64,10 @@ module Deimos
40
64
 
41
65
  # @override
42
66
  def schema_fields
43
- avro_schema.fields.map { |field| SchemaField.new(field.name, field.type) }
67
+ avro_schema.fields.map do |field|
68
+ enum_values = field.type.type == 'enum' ? field.type.symbols : []
69
+ SchemaField.new(field.name, field.type, enum_values)
70
+ end
44
71
  end
45
72
 
46
73
  # @override
@@ -55,6 +82,11 @@ module Deimos
55
82
  :avro_validation
56
83
  end
57
84
 
85
+ # @override
86
+ def self.content_type
87
+ 'avro/binary'
88
+ end
89
+
58
90
  private
59
91
 
60
92
  # @param schema [String]
@@ -10,18 +10,37 @@ module Deimos
10
10
  @schema = schema
11
11
  end
12
12
 
13
- # @param type [Symbol]
13
+ # Coerce sub-records in a payload to match the schema.
14
+ # @param type [Avro::Schema::UnionSchema]
15
+ # @param val [Object]
16
+ # @return [Object]
17
+ def coerce_union(type, val)
18
+ union_types = type.schemas.map { |s| s.type.to_sym }
19
+ return nil if val.nil? && union_types.include?(:null)
20
+
21
+ schema_type = type.schemas.find { |s| s.type.to_sym != :null }
22
+ coerce_type(schema_type, val)
23
+ end
24
+
25
+ # Coerce sub-records in a payload to match the schema.
26
+ # @param type [Avro::Schema::RecordSchema]
27
+ # @param val [Object]
28
+ # @return [Object]
29
+ def coerce_record(type, val)
30
+ record = val.map do |name, value|
31
+ field = type.fields.find { |f| f.name == name }
32
+ coerce_type(field.type, value)
33
+ end
34
+ val.keys.zip(record).to_h
35
+ end
36
+
37
+ # Coerce values in a payload to match the schema.
38
+ # @param type [Avro::Schema]
14
39
  # @param val [Object]
15
40
  # @return [Object]
16
41
  def coerce_type(type, val)
17
42
  int_classes = [Time, ActiveSupport::TimeWithZone]
18
43
  field_type = type.type.to_sym
19
- if field_type == :union
20
- union_types = type.schemas.map { |s| s.type.to_sym }
21
- return nil if val.nil? && union_types.include?(:null)
22
-
23
- field_type = union_types.find { |t| t != :null }
24
- end
25
44
 
26
45
  case field_type
27
46
  when :int, :long
@@ -32,14 +51,12 @@ module Deimos
32
51
  else
33
52
  val # this will fail
34
53
  end
35
-
36
54
  when :float, :double
37
55
  if val.is_a?(Numeric) || _is_float_string?(val)
38
56
  val.to_f
39
57
  else
40
58
  val # this will fail
41
59
  end
42
-
43
60
  when :string
44
61
  if val.respond_to?(:to_str)
45
62
  val.to_s
@@ -54,6 +71,10 @@ module Deimos
54
71
  else
55
72
  true
56
73
  end
74
+ when :union
75
+ coerce_union(type, val)
76
+ when :record
77
+ coerce_record(type, val)
57
78
  else
58
79
  val
59
80
  end
@@ -3,13 +3,15 @@
3
3
  module Deimos
4
4
  # Represents a field in the schema.
5
5
  class SchemaField
6
- attr_accessor :name, :type
6
+ attr_accessor :name, :type, :enum_values
7
7
 
8
8
  # @param name [String]
9
9
  # @param type [Object]
10
- def initialize(name, type)
10
+ # @param enum_values [Array<String>]
11
+ def initialize(name, type, enum_values=[])
11
12
  @name = name
12
13
  @type = type
14
+ @enum_values = enum_values
13
15
  end
14
16
  end
15
17
 
@@ -69,6 +71,12 @@ module Deimos
69
71
  :mock
70
72
  end
71
73
 
74
+ # The content type to use when encoding / decoding requests over HTTP via ActionController.
75
+ # @return [String]
76
+ def self.content_type
77
+ raise NotImplementedError
78
+ end
79
+
72
80
  # Encode a payload. To be defined by subclass.
73
81
  # @param payload [Hash]
74
82
  # @param schema [Symbol|String]
@@ -109,6 +117,17 @@ module Deimos
109
117
  raise NotImplementedError
110
118
  end
111
119
 
120
+ # Given a field definition, return the SQL type that might be used in
121
+ # ActiveRecord table creation - e.g. for Avro, a `long` type would
122
+ # return `:bigint`. There are also special values that need to be returned:
123
+ # `:array`, `:map` and `:record`, for types representing those structures.
124
+ # `:enum` is also recognized.
125
+ # @param field [SchemaField]
126
+ # @return [Symbol]
127
+ def sql_type(field)
128
+ raise NotImplementedError
129
+ end
130
+
112
131
  # Encode a message key. To be defined by subclass.
113
132
  # @param key [String|Hash] the value to use as the key.
114
133
  # @param key_id [Symbol|String] the field name of the key.
@@ -9,6 +9,8 @@ module Deimos
9
9
  attr_accessor :id, :current_topic
10
10
 
11
11
  BATCH_SIZE = 1000
12
+ DELETE_BATCH_SIZE = 10
13
+ MAX_DELETE_ATTEMPTS = 3
12
14
 
13
15
  # @param logger [Logger]
14
16
  def initialize(logger=Logger.new(STDOUT))
@@ -48,6 +50,7 @@ module Deimos
48
50
  topics = retrieve_topics
49
51
  @logger.info("Found topics: #{topics}")
50
52
  topics.each(&method(:process_topic))
53
+ KafkaTopicInfo.ping_empty_topics(topics)
51
54
  sleep(0.5)
52
55
  end
53
56
 
@@ -87,13 +90,13 @@ module Deimos
87
90
  begin
88
91
  produce_messages(compacted_messages.map(&:phobos_message))
89
92
  rescue Kafka::BufferOverflow, Kafka::MessageSizeTooLarge, Kafka::RecordListTooLarge
93
+ delete_messages(messages)
90
94
  @logger.error('Message batch too large, deleting...')
91
95
  @logger.error(Deimos::KafkaMessage.decoded(messages))
92
- Deimos::KafkaMessage.where(id: messages.map(&:id)).delete_all
93
96
  raise
94
97
  end
95
98
  end
96
- Deimos::KafkaMessage.where(id: messages.map(&:id)).delete_all
99
+ delete_messages(messages)
97
100
  Deimos.config.metrics&.increment(
98
101
  'db_producer.process',
99
102
  tags: %W(topic:#{@current_topic}),
@@ -106,6 +109,27 @@ module Deimos
106
109
  true
107
110
  end
108
111
 
112
+ # @param messages [Array<Deimos::KafkaMessage>]
113
+ def delete_messages(messages)
114
+ attempts = 1
115
+ begin
116
+ messages.in_groups_of(DELETE_BATCH_SIZE, false).each do |batch|
117
+ Deimos::KafkaMessage.where(topic: batch.first.topic,
118
+ id: batch.map(&:id)).
119
+ delete_all
120
+ end
121
+ rescue StandardError => e
122
+ if (e.message =~ /Lock wait/i || e.message =~ /Lost connection/i) &&
123
+ attempts <= MAX_DELETE_ATTEMPTS
124
+ attempts += 1
125
+ ActiveRecord::Base.connection.verify!
126
+ sleep(1)
127
+ retry
128
+ end
129
+ raise
130
+ end
131
+ end
132
+
109
133
  # @return [Array<Deimos::KafkaMessage>]
110
134
  def retrieve_messages
111
135
  KafkaMessage.where(topic: @current_topic).order(:id).limit(BATCH_SIZE)
@@ -126,19 +150,33 @@ module Deimos
126
150
  metrics = Deimos.config.metrics
127
151
  return unless metrics
128
152
 
153
+ topics = KafkaTopicInfo.select(%w(topic last_processed_at))
129
154
  messages = Deimos::KafkaMessage.
130
155
  select('count(*) as num_messages, min(created_at) as earliest, topic').
131
- group(:topic)
132
- if messages.none?
133
- metrics.gauge('pending_db_messages_max_wait', 0)
134
- end
135
- messages.each do |record|
136
- earliest = record.earliest
137
- # SQLite gives a string here
138
- earliest = Time.zone.parse(earliest) if earliest.is_a?(String)
139
-
140
- time_diff = Time.zone.now - earliest
141
- metrics.gauge('pending_db_messages_max_wait', time_diff,
156
+ group(:topic).
157
+ index_by(&:topic)
158
+ topics.each do |record|
159
+ message_record = messages[record.topic]
160
+ # We want to record the last time we saw any activity, meaning either
161
+ # the oldest message, or the last time we processed, whichever comes
162
+ # last.
163
+ if message_record
164
+ record_earliest = record.earliest
165
+ # SQLite gives a string here
166
+ if record_earliest.is_a?(String)
167
+ record_earliest = Time.zone.parse(record_earliest)
168
+ end
169
+
170
+ earliest = [record.last_processed_at, record_earliest].max
171
+ time_diff = Time.zone.now - earliest
172
+ metrics.gauge('pending_db_messages_max_wait', time_diff,
173
+ tags: ["topic:#{record.topic}"])
174
+ else
175
+ # no messages waiting
176
+ metrics.gauge('pending_db_messages_max_wait', 0,
177
+ tags: ["topic:#{record.topic}"])
178
+ end
179
+ metrics.gauge('pending_db_messages_count', message_record&.num_messages || 0,
142
180
  tags: ["topic:#{record.topic}"])
143
181
  end
144
182
  end
@@ -174,11 +212,11 @@ module Deimos
174
212
  end
175
213
 
176
214
  @logger.error("Got error #{e.class.name} when publishing #{batch.size} in groups of #{batch_size}, retrying...")
177
- if batch_size < 10
178
- batch_size = 1
179
- else
180
- batch_size /= 10
181
- end
215
+ batch_size = if batch_size < 10
216
+ 1
217
+ else
218
+ (batch_size / 10).to_i
219
+ end
182
220
  shutdown_producer
183
221
  retry
184
222
  end
@@ -187,7 +225,7 @@ module Deimos
187
225
  # @param batch [Array<Deimos::KafkaMessage>]
188
226
  # @return [Array<Deimos::KafkaMessage>]
189
227
  def compact_messages(batch)
190
- return batch unless batch.first&.key.present?
228
+ return batch if batch.first&.key.blank?
191
229
 
192
230
  topic = batch.first.topic
193
231
  return batch if config.compact_topics != :all &&
@@ -0,0 +1,111 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ module Utils
5
+ # Mixin to automatically decode schema-encoded payloads when given the correct content type,
6
+ # and provide the `render_schema` method to encode the payload for responses.
7
+ module SchemaControllerMixin
8
+ extend ActiveSupport::Concern
9
+
10
+ included do
11
+ Mime::Type.register('avro/binary', :avro)
12
+
13
+ attr_accessor :payload
14
+
15
+ if respond_to?(:before_filter)
16
+ before_filter(:decode_schema, if: :schema_format?)
17
+ else
18
+ before_action(:decode_schema, if: :schema_format?)
19
+ end
20
+ end
21
+
22
+ # :nodoc:
23
+ module ClassMethods
24
+ # @return [Hash<String, Hash<Symbol, String>>]
25
+ def schema_mapping
26
+ @schema_mapping ||= {}
27
+ end
28
+
29
+ # Indicate which schemas should be assigned to actions.
30
+ # @param actions [Symbol]
31
+ # @param request [String]
32
+ # @param response [String]
33
+ def schemas(*actions, request: nil, response: nil)
34
+ actions.each do |action|
35
+ request ||= action.to_s.titleize
36
+ response ||= action.to_s.titleize
37
+ schema_mapping[action.to_s] = { request: request, response: response }
38
+ end
39
+ end
40
+
41
+ # @return [Hash<Symbol, String>]
42
+ def namespaces
43
+ @namespaces ||= {}
44
+ end
45
+
46
+ # Set the namespace for both requests and responses.
47
+ # @param name [String]
48
+ def namespace(name)
49
+ request_namespace(name)
50
+ response_namespace(name)
51
+ end
52
+
53
+ # Set the namespace for requests.
54
+ # @param name [String]
55
+ def request_namespace(name)
56
+ namespaces[:request] = name
57
+ end
58
+
59
+ # Set the namespace for repsonses.
60
+ # @param name [String]
61
+ def response_namespace(name)
62
+ namespaces[:response] = name
63
+ end
64
+ end
65
+
66
+ # @return [Boolean]
67
+ def schema_format?
68
+ request.content_type == Deimos.schema_backend_class.content_type
69
+ end
70
+
71
+ # Get the namespace from either an existing instance variable, or tease it out of the schema.
72
+ # @param type [Symbol] :request or :response
73
+ # @return [Array<String, String>] the namespace and schema.
74
+ def parse_namespace(type)
75
+ namespace = self.class.namespaces[type]
76
+ schema = self.class.schema_mapping[params['action']][type]
77
+ if schema.nil?
78
+ raise "No #{type} schema defined for #{params[:controller]}##{params[:action]}!"
79
+ end
80
+
81
+ if namespace.nil?
82
+ last_period = schema.rindex('.')
83
+ namespace, schema = schema.split(last_period)
84
+ end
85
+ if namespace.nil? || schema.nil?
86
+ raise "No request namespace defined for #{params[:controller]}##{params[:action]}!"
87
+ end
88
+
89
+ [namespace, schema]
90
+ end
91
+
92
+ # Decode the payload with the parameters.
93
+ def decode_schema
94
+ namespace, schema = parse_namespace(:request)
95
+ decoder = Deimos.schema_backend(schema: schema, namespace: namespace)
96
+ @payload = decoder.decode(request.body.read).with_indifferent_access
97
+ request.body.rewind if request.body.respond_to?(:rewind)
98
+ end
99
+
100
+ # Render a hash into a payload as specified by the configured schema and namespace.
101
+ # @param payload [Hash]
102
+ def render_schema(payload, schema: nil, namespace: nil)
103
+ namespace, schema = parse_namespace(:response) if !schema && !namespace
104
+ encoder = Deimos.schema_backend(schema: schema, namespace: namespace)
105
+ encoded = encoder.encode(payload)
106
+ response.headers['Content-Type'] = encoder.class.content_type
107
+ send_data(encoded)
108
+ end
109
+ end
110
+ end
111
+ end