deimos-ruby 1.7.0.pre.beta1 → 1.8.1.pre.beta3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +8 -4
  3. data/CHANGELOG.md +50 -0
  4. data/Gemfile.lock +109 -75
  5. data/README.md +147 -16
  6. data/deimos-ruby.gemspec +4 -2
  7. data/docs/ARCHITECTURE.md +144 -0
  8. data/docs/CONFIGURATION.md +4 -0
  9. data/lib/deimos.rb +8 -7
  10. data/lib/deimos/active_record_consume/batch_consumption.rb +159 -0
  11. data/lib/deimos/active_record_consume/batch_slicer.rb +27 -0
  12. data/lib/deimos/active_record_consume/message_consumption.rb +58 -0
  13. data/lib/deimos/active_record_consume/schema_model_converter.rb +52 -0
  14. data/lib/deimos/active_record_consumer.rb +33 -75
  15. data/lib/deimos/batch_consumer.rb +2 -142
  16. data/lib/deimos/config/configuration.rb +8 -10
  17. data/lib/deimos/consume/batch_consumption.rb +150 -0
  18. data/lib/deimos/consume/message_consumption.rb +94 -0
  19. data/lib/deimos/consumer.rb +79 -72
  20. data/lib/deimos/instrumentation.rb +10 -5
  21. data/lib/deimos/kafka_message.rb +1 -1
  22. data/lib/deimos/kafka_topic_info.rb +21 -2
  23. data/lib/deimos/message.rb +6 -1
  24. data/lib/deimos/schema_backends/avro_base.rb +33 -1
  25. data/lib/deimos/schema_backends/avro_schema_coercer.rb +30 -11
  26. data/lib/deimos/schema_backends/base.rb +21 -2
  27. data/lib/deimos/utils/db_poller.rb +6 -6
  28. data/lib/deimos/utils/db_producer.rb +57 -15
  29. data/lib/deimos/utils/deadlock_retry.rb +68 -0
  30. data/lib/deimos/utils/lag_reporter.rb +19 -26
  31. data/lib/deimos/utils/schema_controller_mixin.rb +111 -0
  32. data/lib/deimos/version.rb +1 -1
  33. data/lib/generators/deimos/active_record/templates/migration.rb.tt +28 -0
  34. data/lib/generators/deimos/active_record/templates/model.rb.tt +5 -0
  35. data/lib/generators/deimos/active_record_generator.rb +79 -0
  36. data/lib/generators/deimos/db_backend/templates/migration +1 -0
  37. data/lib/generators/deimos/db_backend/templates/rails3_migration +1 -0
  38. data/spec/active_record_batch_consumer_spec.rb +481 -0
  39. data/spec/active_record_consume/batch_slicer_spec.rb +42 -0
  40. data/spec/active_record_consume/schema_model_converter_spec.rb +105 -0
  41. data/spec/active_record_consumer_spec.rb +3 -11
  42. data/spec/batch_consumer_spec.rb +24 -7
  43. data/spec/config/configuration_spec.rb +4 -0
  44. data/spec/consumer_spec.rb +6 -6
  45. data/spec/deimos_spec.rb +57 -49
  46. data/spec/generators/active_record_generator_spec.rb +56 -0
  47. data/spec/handlers/my_batch_consumer.rb +6 -1
  48. data/spec/handlers/my_consumer.rb +6 -1
  49. data/spec/kafka_listener_spec.rb +54 -0
  50. data/spec/kafka_topic_info_spec.rb +39 -16
  51. data/spec/message_spec.rb +19 -0
  52. data/spec/producer_spec.rb +34 -0
  53. data/spec/schemas/com/my-namespace/Generated.avsc +71 -0
  54. data/spec/schemas/com/my-namespace/MyNestedSchema.avsc +55 -0
  55. data/spec/schemas/com/my-namespace/MySchemaCompound-key.avsc +18 -0
  56. data/spec/schemas/com/my-namespace/Wibble.avsc +43 -0
  57. data/spec/schemas/com/my-namespace/request/Index.avsc +11 -0
  58. data/spec/schemas/com/my-namespace/request/UpdateRequest.avsc +11 -0
  59. data/spec/schemas/com/my-namespace/response/Index.avsc +11 -0
  60. data/spec/schemas/com/my-namespace/response/UpdateResponse.avsc +11 -0
  61. data/spec/spec_helper.rb +24 -0
  62. data/spec/utils/db_poller_spec.rb +2 -2
  63. data/spec/utils/db_producer_spec.rb +84 -10
  64. data/spec/utils/deadlock_retry_spec.rb +74 -0
  65. data/spec/utils/lag_reporter_spec.rb +29 -22
  66. data/spec/utils/schema_controller_mixin_spec.rb +68 -0
  67. metadata +87 -30
  68. data/lib/deimos/base_consumer.rb +0 -100
  69. data/lib/deimos/utils/executor.rb +0 -124
  70. data/lib/deimos/utils/platform_schema_validation.rb +0 -0
  71. data/lib/deimos/utils/signal_handler.rb +0 -68
  72. data/spec/utils/executor_spec.rb +0 -53
  73. data/spec/utils/signal_handler_spec.rb +0 -16
@@ -0,0 +1,94 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ module Consume
5
+ # Methods used by message-by-message (non-batch) consumers. These consumers
6
+ # are invoked for every individual message.
7
+ module MessageConsumption
8
+ extend ActiveSupport::Concern
9
+ include Phobos::Handler
10
+
11
+ # :nodoc:
12
+ def around_consume(payload, metadata)
13
+ decoded_payload = payload.dup
14
+ new_metadata = metadata.dup
15
+ benchmark = Benchmark.measure do
16
+ _with_span do
17
+ new_metadata[:key] = decode_key(metadata[:key]) if self.class.config[:key_configured]
18
+ decoded_payload = payload ? self.class.decoder.decode(payload) : nil
19
+ _received_message(decoded_payload, new_metadata)
20
+ yield decoded_payload, new_metadata
21
+ end
22
+ end
23
+ _handle_success(benchmark.real, decoded_payload, new_metadata)
24
+ rescue StandardError => e
25
+ _handle_error(e, decoded_payload, new_metadata)
26
+ end
27
+
28
+ # Consume incoming messages.
29
+ # @param _payload [String]
30
+ # @param _metadata [Hash]
31
+ def consume(_payload, _metadata)
32
+ raise NotImplementedError
33
+ end
34
+
35
+ private
36
+
37
+ def _received_message(payload, metadata)
38
+ Deimos.config.logger.info(
39
+ message: 'Got Kafka event',
40
+ payload: payload,
41
+ metadata: metadata
42
+ )
43
+ Deimos.config.metrics&.increment('handler', tags: %W(
44
+ status:received
45
+ topic:#{metadata[:topic]}
46
+ ))
47
+ _report_time_delayed(payload, metadata)
48
+ end
49
+
50
+ # @param exception [Throwable]
51
+ # @param payload [Hash]
52
+ # @param metadata [Hash]
53
+ def _handle_error(exception, payload, metadata)
54
+ Deimos.config.metrics&.increment(
55
+ 'handler',
56
+ tags: %W(
57
+ status:error
58
+ topic:#{metadata[:topic]}
59
+ )
60
+ )
61
+ Deimos.config.logger.warn(
62
+ message: 'Error consuming message',
63
+ handler: self.class.name,
64
+ metadata: metadata,
65
+ data: payload,
66
+ error_message: exception.message,
67
+ error: exception.backtrace
68
+ )
69
+
70
+ _error(exception, payload, metadata)
71
+ end
72
+
73
+ # @param time_taken [Float]
74
+ # @param payload [Hash]
75
+ # @param metadata [Hash]
76
+ def _handle_success(time_taken, payload, metadata)
77
+ Deimos.config.metrics&.histogram('handler', time_taken, tags: %W(
78
+ time:consume
79
+ topic:#{metadata[:topic]}
80
+ ))
81
+ Deimos.config.metrics&.increment('handler', tags: %W(
82
+ status:success
83
+ topic:#{metadata[:topic]}
84
+ ))
85
+ Deimos.config.logger.info(
86
+ message: 'Finished processing Kafka event',
87
+ payload: payload,
88
+ time_elapsed: time_taken,
89
+ metadata: metadata
90
+ )
91
+ end
92
+ end
93
+ end
94
+ end
@@ -1,97 +1,104 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'deimos/base_consumer'
4
- require 'deimos/shared_config'
5
- require 'phobos/handler'
6
- require 'active_support/all'
3
+ require 'deimos/consume/batch_consumption'
4
+ require 'deimos/consume/message_consumption'
7
5
 
8
- # Class to consume messages coming from the pipeline topic
6
+ # Class to consume messages coming from a Kafka topic
9
7
  # Note: According to the docs, instances of your handler will be created
10
- # for every incoming message. This class should be lightweight.
8
+ # for every incoming message/batch. This class should be lightweight.
11
9
  module Deimos
12
- # Parent consumer class.
13
- class Consumer < BaseConsumer
14
- include Phobos::Handler
10
+ # Basic consumer class. Inherit from this class and override either consume
11
+ # or consume_batch, depending on the delivery mode of your listener.
12
+ # `consume` -> use `delivery :message` or `delivery :batch`
13
+ # `consume_batch` -> use `delivery :inline_batch`
14
+ class Consumer
15
+ include Consume::MessageConsumption
16
+ include Consume::BatchConsumption
17
+ include SharedConfig
15
18
 
16
- # :nodoc:
17
- def around_consume(payload, metadata)
18
- decoded_payload = payload.dup
19
- new_metadata = metadata.dup
20
- benchmark = Benchmark.measure do
21
- _with_span do
22
- new_metadata[:key] = decode_key(metadata[:key]) if self.class.config[:key_configured]
23
- decoded_payload = payload ? self.class.decoder.decode(payload) : nil
24
- _received_message(decoded_payload, new_metadata)
25
- yield decoded_payload, new_metadata
26
- end
19
+ class << self
20
+ # @return [Deimos::SchemaBackends::Base]
21
+ def decoder
22
+ @decoder ||= Deimos.schema_backend(schema: config[:schema],
23
+ namespace: config[:namespace])
24
+ end
25
+
26
+ # @return [Deimos::SchemaBackends::Base]
27
+ def key_decoder
28
+ @key_decoder ||= Deimos.schema_backend(schema: config[:key_schema],
29
+ namespace: config[:namespace])
27
30
  end
28
- _handle_success(benchmark.real, decoded_payload, new_metadata)
29
- rescue StandardError => e
30
- _handle_error(e, decoded_payload, new_metadata)
31
31
  end
32
32
 
33
- # Consume incoming messages.
34
- # @param _payload [String]
35
- # @param _metadata [Hash]
36
- def consume(_payload, _metadata)
37
- raise NotImplementedError
33
+ # Helper method to decode an encoded key.
34
+ # @param key [String]
35
+ # @return [Object] the decoded key.
36
+ def decode_key(key)
37
+ return nil if key.nil?
38
+
39
+ config = self.class.config
40
+ unless config[:key_configured]
41
+ raise 'No key config given - if you are not decoding keys, please use '\
42
+ '`key_config plain: true`'
43
+ end
44
+
45
+ if config[:key_field]
46
+ self.class.decoder.decode_key(key, config[:key_field])
47
+ elsif config[:key_schema]
48
+ self.class.key_decoder.decode(key, schema: config[:key_schema])
49
+ else # no encoding
50
+ key
51
+ end
38
52
  end
39
53
 
40
54
  private
41
55
 
42
- def _received_message(payload, metadata)
43
- Deimos.config.logger.info(
44
- message: 'Got Kafka event',
45
- payload: payload,
46
- metadata: metadata
56
+ def _with_span
57
+ @span = Deimos.config.tracer&.start(
58
+ 'deimos-consumer',
59
+ resource: self.class.name.gsub('::', '-')
47
60
  )
48
- Deimos.config.metrics&.increment('handler', tags: %W(
49
- status:received
61
+ yield
62
+ ensure
63
+ Deimos.config.tracer&.finish(@span)
64
+ end
65
+
66
+ def _report_time_delayed(payload, metadata)
67
+ return if payload.nil? || payload['timestamp'].blank?
68
+
69
+ begin
70
+ time_delayed = Time.now.in_time_zone - payload['timestamp'].to_datetime
71
+ rescue ArgumentError
72
+ Deimos.config.logger.info(
73
+ message: "Error parsing timestamp! #{payload['timestamp']}"
74
+ )
75
+ return
76
+ end
77
+ Deimos.config.metrics&.histogram('handler', time_delayed, tags: %W(
78
+ time:time_delayed
50
79
  topic:#{metadata[:topic]}
51
80
  ))
52
- _report_time_delayed(payload, metadata)
53
81
  end
54
82
 
55
- # @param exception [Throwable]
56
- # @param payload [Hash]
57
- # @param metadata [Hash]
58
- def _handle_error(exception, payload, metadata)
59
- Deimos.config.metrics&.increment(
60
- 'handler',
61
- tags: %W(
62
- status:error
63
- topic:#{metadata[:topic]}
64
- )
65
- )
66
- Deimos.config.logger.warn(
67
- message: 'Error consuming message',
68
- handler: self.class.name,
69
- metadata: metadata,
70
- data: payload,
71
- error_message: exception.message,
72
- error: exception.backtrace
73
- )
74
- super
83
+ # Overrideable method to determine if a given error should be considered
84
+ # "fatal" and always be reraised.
85
+ # @param _error [Exception]
86
+ # @param _payload [Hash]
87
+ # @param _metadata [Hash]
88
+ # @return [Boolean]
89
+ def fatal_error?(_error, _payload, _metadata)
90
+ false
75
91
  end
76
92
 
77
- # @param time_taken [Float]
93
+ # @param exception [Exception]
78
94
  # @param payload [Hash]
79
95
  # @param metadata [Hash]
80
- def _handle_success(time_taken, payload, metadata)
81
- Deimos.config.metrics&.histogram('handler', time_taken, tags: %W(
82
- time:consume
83
- topic:#{metadata[:topic]}
84
- ))
85
- Deimos.config.metrics&.increment('handler', tags: %W(
86
- status:success
87
- topic:#{metadata[:topic]}
88
- ))
89
- Deimos.config.logger.info(
90
- message: 'Finished processing Kafka event',
91
- payload: payload,
92
- time_elapsed: time_taken,
93
- metadata: metadata
94
- )
96
+ def _error(exception, payload, metadata)
97
+ Deimos.config.tracer&.set_error(@span, exception)
98
+
99
+ raise if Deimos.config.consumers.reraise_errors ||
100
+ Deimos.config.consumers.fatal_error&.call(exception, payload, metadata) ||
101
+ fatal_error?(exception, payload, metadata)
95
102
  end
96
103
  end
97
104
  end
@@ -46,13 +46,18 @@ module Deimos
46
46
 
47
47
  messages = exception.failed_messages
48
48
  messages.group_by(&:topic).each do |topic, batch|
49
- next if batch.empty?
49
+ producer = Deimos::Producer.descendants.find { |c| c.topic == topic }
50
+ next if batch.empty? || !producer
50
51
 
51
- producer = batch.first.metadata[:producer_name]
52
- payloads = batch.map { |m| m.metadata[:decoded_payload] }
52
+ decoder = Deimos.schema_backend(schema: producer.config[:schema],
53
+ namespace: producer.config[:namespace])
54
+ payloads = batch.map { |m| decoder.decode(m.value) }
53
55
 
54
- Deimos.config.metrics&.count('publish_error', payloads.size,
55
- tags: %W(topic:#{topic}))
56
+ Deimos.config.metrics&.increment(
57
+ 'publish_error',
58
+ tags: %W(topic:#{topic}),
59
+ by: payloads.size
60
+ )
56
61
  Deimos.instrument(
57
62
  'produce_error',
58
63
  producer: producer,
@@ -42,7 +42,7 @@ module Deimos
42
42
  messages.map do |m|
43
43
  {
44
44
  key: m.key.present? ? decoder&.decode_key(m.key) || m.key : nil,
45
- payload: decoder&.decoder&.decode(self.message) || self.message
45
+ payload: decoder&.decoder&.decode(m.message) || m.message
46
46
  }
47
47
  end
48
48
  end
@@ -13,7 +13,7 @@ module Deimos
13
13
  def lock(topic, lock_id)
14
14
  # Try to create it - it's fine if it already exists
15
15
  begin
16
- self.create(topic: topic)
16
+ self.create(topic: topic, last_processed_at: Time.zone.now)
17
17
  rescue ActiveRecord::RecordNotUnique
18
18
  # continue on
19
19
  end
@@ -52,7 +52,26 @@ module Deimos
52
52
  # @param lock_id [String]
53
53
  def clear_lock(topic, lock_id)
54
54
  self.where(topic: topic, locked_by: lock_id).
55
- update_all(locked_by: nil, locked_at: nil, error: false, retries: 0)
55
+ update_all(locked_by: nil,
56
+ locked_at: nil,
57
+ error: false,
58
+ retries: 0,
59
+ last_processed_at: Time.zone.now)
60
+ end
61
+
62
+ # Update all topics that aren't currently locked and have no messages
63
+ # waiting. It's OK if some messages get inserted in the middle of this
64
+ # because the point is that at least within a few milliseconds of each
65
+ # other, it wasn't locked and had no messages, meaning the topic
66
+ # was in a good state.
67
+ # @param except_topics [Array<String>] the list of topics we've just
68
+ # realized had messages in them, meaning all other topics were empty.
69
+ def ping_empty_topics(except_topics)
70
+ records = KafkaTopicInfo.where(locked_by: nil).
71
+ where('topic not in(?)', except_topics)
72
+ records.each do |info|
73
+ info.update_attribute(:last_processed_at, Time.zone.now)
74
+ end
56
75
  end
57
76
 
58
77
  # The producer calls this if it gets an error sending messages. This
@@ -10,7 +10,7 @@ module Deimos
10
10
  # @param producer [Class]
11
11
  def initialize(payload, producer, topic: nil, key: nil, partition_key: nil)
12
12
  @payload = payload&.with_indifferent_access
13
- @producer_name = producer.name
13
+ @producer_name = producer&.name
14
14
  @topic = topic
15
15
  @key = key
16
16
  @partition_key = partition_key
@@ -70,5 +70,10 @@ module Deimos
70
70
  def ==(other)
71
71
  self.to_h == other.to_h
72
72
  end
73
+
74
+ # @return [Boolean] True if this message is a tombstone
75
+ def tombstone?
76
+ payload.nil?
77
+ end
73
78
  end
74
79
  end
@@ -33,6 +33,30 @@ module Deimos
33
33
  decode(payload, schema: @key_schema['name'])[field_name]
34
34
  end
35
35
 
36
+ # :nodoc:
37
+ def sql_type(field)
38
+ type = field.type.type
39
+ return type if %w(array map record).include?(type)
40
+
41
+ if type == :union
42
+ non_null = field.type.schemas.reject { |f| f.type == :null }
43
+ if non_null.size > 1
44
+ warn("WARNING: #{field.name} has more than one non-null type. Picking the first for the SQL type.")
45
+ end
46
+ return non_null.first.type
47
+ end
48
+ return type.to_sym if %w(float boolean).include?(type)
49
+ return :integer if type == 'int'
50
+ return :bigint if type == 'long'
51
+
52
+ if type == 'double'
53
+ warn('Avro `double` type turns into SQL `float` type. Please ensure you have the correct `limit` set.')
54
+ return :float
55
+ end
56
+
57
+ :string
58
+ end
59
+
36
60
  # @override
37
61
  def coerce_field(field, value)
38
62
  AvroSchemaCoercer.new(avro_schema).coerce_type(field.type, value)
@@ -40,7 +64,10 @@ module Deimos
40
64
 
41
65
  # @override
42
66
  def schema_fields
43
- avro_schema.fields.map { |field| SchemaField.new(field.name, field.type) }
67
+ avro_schema.fields.map do |field|
68
+ enum_values = field.type.type == 'enum' ? field.type.symbols : []
69
+ SchemaField.new(field.name, field.type, enum_values)
70
+ end
44
71
  end
45
72
 
46
73
  # @override
@@ -55,6 +82,11 @@ module Deimos
55
82
  :avro_validation
56
83
  end
57
84
 
85
+ # @override
86
+ def self.content_type
87
+ 'avro/binary'
88
+ end
89
+
58
90
  private
59
91
 
60
92
  # @param schema [String]
@@ -10,18 +10,37 @@ module Deimos
10
10
  @schema = schema
11
11
  end
12
12
 
13
- # @param type [Symbol]
13
+ # Coerce sub-records in a payload to match the schema.
14
+ # @param type [Avro::Schema::UnionSchema]
15
+ # @param val [Object]
16
+ # @return [Object]
17
+ def coerce_union(type, val)
18
+ union_types = type.schemas.map { |s| s.type.to_sym }
19
+ return nil if val.nil? && union_types.include?(:null)
20
+
21
+ schema_type = type.schemas.find { |s| s.type.to_sym != :null }
22
+ coerce_type(schema_type, val)
23
+ end
24
+
25
+ # Coerce sub-records in a payload to match the schema.
26
+ # @param type [Avro::Schema::RecordSchema]
27
+ # @param val [Object]
28
+ # @return [Object]
29
+ def coerce_record(type, val)
30
+ record = val.map do |name, value|
31
+ field = type.fields.find { |f| f.name == name }
32
+ coerce_type(field.type, value)
33
+ end
34
+ val.keys.zip(record).to_h
35
+ end
36
+
37
+ # Coerce values in a payload to match the schema.
38
+ # @param type [Avro::Schema]
14
39
  # @param val [Object]
15
40
  # @return [Object]
16
41
  def coerce_type(type, val)
17
42
  int_classes = [Time, ActiveSupport::TimeWithZone]
18
43
  field_type = type.type.to_sym
19
- if field_type == :union
20
- union_types = type.schemas.map { |s| s.type.to_sym }
21
- return nil if val.nil? && union_types.include?(:null)
22
-
23
- field_type = union_types.find { |t| t != :null }
24
- end
25
44
 
26
45
  case field_type
27
46
  when :int, :long
@@ -32,14 +51,12 @@ module Deimos
32
51
  else
33
52
  val # this will fail
34
53
  end
35
-
36
54
  when :float, :double
37
55
  if val.is_a?(Numeric) || _is_float_string?(val)
38
56
  val.to_f
39
57
  else
40
58
  val # this will fail
41
59
  end
42
-
43
60
  when :string
44
61
  if val.respond_to?(:to_str)
45
62
  val.to_s
@@ -54,8 +71,10 @@ module Deimos
54
71
  else
55
72
  true
56
73
  end
57
- else
58
- val
74
+ when :union
75
+ coerce_union(type, val)
76
+ when :record
77
+ coerce_record(type, val)
59
78
  end
60
79
  end
61
80