deimos-ruby 1.6.2 → 1.8.0.pre.beta2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (65) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +9 -0
  3. data/.rubocop.yml +15 -13
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +31 -0
  6. data/Gemfile.lock +43 -36
  7. data/README.md +141 -16
  8. data/Rakefile +1 -1
  9. data/deimos-ruby.gemspec +2 -1
  10. data/docs/ARCHITECTURE.md +144 -0
  11. data/docs/CONFIGURATION.md +27 -0
  12. data/lib/deimos.rb +7 -6
  13. data/lib/deimos/active_record_consume/batch_consumption.rb +159 -0
  14. data/lib/deimos/active_record_consume/batch_slicer.rb +27 -0
  15. data/lib/deimos/active_record_consume/message_consumption.rb +58 -0
  16. data/lib/deimos/active_record_consume/schema_model_converter.rb +52 -0
  17. data/lib/deimos/active_record_consumer.rb +33 -75
  18. data/lib/deimos/active_record_producer.rb +23 -0
  19. data/lib/deimos/batch_consumer.rb +2 -140
  20. data/lib/deimos/config/configuration.rb +28 -10
  21. data/lib/deimos/consume/batch_consumption.rb +150 -0
  22. data/lib/deimos/consume/message_consumption.rb +94 -0
  23. data/lib/deimos/consumer.rb +79 -69
  24. data/lib/deimos/kafka_message.rb +1 -1
  25. data/lib/deimos/kafka_topic_info.rb +1 -1
  26. data/lib/deimos/message.rb +6 -1
  27. data/lib/deimos/metrics/provider.rb +0 -2
  28. data/lib/deimos/poll_info.rb +9 -0
  29. data/lib/deimos/tracing/provider.rb +0 -2
  30. data/lib/deimos/utils/db_poller.rb +149 -0
  31. data/lib/deimos/utils/db_producer.rb +8 -3
  32. data/lib/deimos/utils/deadlock_retry.rb +68 -0
  33. data/lib/deimos/utils/lag_reporter.rb +19 -26
  34. data/lib/deimos/version.rb +1 -1
  35. data/lib/generators/deimos/db_poller/templates/migration +11 -0
  36. data/lib/generators/deimos/db_poller/templates/rails3_migration +16 -0
  37. data/lib/generators/deimos/db_poller_generator.rb +48 -0
  38. data/lib/tasks/deimos.rake +7 -0
  39. data/spec/active_record_batch_consumer_spec.rb +481 -0
  40. data/spec/active_record_consume/batch_slicer_spec.rb +42 -0
  41. data/spec/active_record_consume/schema_model_converter_spec.rb +105 -0
  42. data/spec/active_record_consumer_spec.rb +3 -11
  43. data/spec/active_record_producer_spec.rb +66 -88
  44. data/spec/batch_consumer_spec.rb +24 -7
  45. data/spec/config/configuration_spec.rb +4 -0
  46. data/spec/consumer_spec.rb +8 -8
  47. data/spec/deimos_spec.rb +57 -49
  48. data/spec/handlers/my_batch_consumer.rb +6 -1
  49. data/spec/handlers/my_consumer.rb +6 -1
  50. data/spec/message_spec.rb +19 -0
  51. data/spec/producer_spec.rb +3 -3
  52. data/spec/rake_spec.rb +1 -1
  53. data/spec/schemas/com/my-namespace/MySchemaCompound-key.avsc +18 -0
  54. data/spec/schemas/com/my-namespace/Wibble.avsc +43 -0
  55. data/spec/spec_helper.rb +61 -6
  56. data/spec/utils/db_poller_spec.rb +320 -0
  57. data/spec/utils/deadlock_retry_spec.rb +74 -0
  58. data/spec/utils/lag_reporter_spec.rb +29 -22
  59. metadata +55 -20
  60. data/lib/deimos/base_consumer.rb +0 -104
  61. data/lib/deimos/utils/executor.rb +0 -124
  62. data/lib/deimos/utils/platform_schema_validation.rb +0 -0
  63. data/lib/deimos/utils/signal_handler.rb +0 -68
  64. data/spec/utils/executor_spec.rb +0 -53
  65. data/spec/utils/signal_handler_spec.rb +0 -16
@@ -0,0 +1,94 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ module Consume
5
+ # Methods used by message-by-message (non-batch) consumers. These consumers
6
+ # are invoked for every individual message.
7
+ module MessageConsumption
8
+ extend ActiveSupport::Concern
9
+ include Phobos::Handler
10
+
11
+ # :nodoc:
12
+ def around_consume(payload, metadata)
13
+ decoded_payload = payload.dup
14
+ new_metadata = metadata.dup
15
+ benchmark = Benchmark.measure do
16
+ _with_span do
17
+ new_metadata[:key] = decode_key(metadata[:key]) if self.class.config[:key_configured]
18
+ decoded_payload = payload ? self.class.decoder.decode(payload) : nil
19
+ _received_message(decoded_payload, new_metadata)
20
+ yield decoded_payload, new_metadata
21
+ end
22
+ end
23
+ _handle_success(benchmark.real, decoded_payload, new_metadata)
24
+ rescue StandardError => e
25
+ _handle_error(e, decoded_payload, new_metadata)
26
+ end
27
+
28
+ # Consume incoming messages.
29
+ # @param _payload [String]
30
+ # @param _metadata [Hash]
31
+ def consume(_payload, _metadata)
32
+ raise NotImplementedError
33
+ end
34
+
35
+ private
36
+
37
+ def _received_message(payload, metadata)
38
+ Deimos.config.logger.info(
39
+ message: 'Got Kafka event',
40
+ payload: payload,
41
+ metadata: metadata
42
+ )
43
+ Deimos.config.metrics&.increment('handler', tags: %W(
44
+ status:received
45
+ topic:#{metadata[:topic]}
46
+ ))
47
+ _report_time_delayed(payload, metadata)
48
+ end
49
+
50
+ # @param exception [Throwable]
51
+ # @param payload [Hash]
52
+ # @param metadata [Hash]
53
+ def _handle_error(exception, payload, metadata)
54
+ Deimos.config.metrics&.increment(
55
+ 'handler',
56
+ tags: %W(
57
+ status:error
58
+ topic:#{metadata[:topic]}
59
+ )
60
+ )
61
+ Deimos.config.logger.warn(
62
+ message: 'Error consuming message',
63
+ handler: self.class.name,
64
+ metadata: metadata,
65
+ data: payload,
66
+ error_message: exception.message,
67
+ error: exception.backtrace
68
+ )
69
+
70
+ _error(exception, payload, metadata)
71
+ end
72
+
73
+ # @param time_taken [Float]
74
+ # @param payload [Hash]
75
+ # @param metadata [Hash]
76
+ def _handle_success(time_taken, payload, metadata)
77
+ Deimos.config.metrics&.histogram('handler', time_taken, tags: %W(
78
+ time:consume
79
+ topic:#{metadata[:topic]}
80
+ ))
81
+ Deimos.config.metrics&.increment('handler', tags: %W(
82
+ status:success
83
+ topic:#{metadata[:topic]}
84
+ ))
85
+ Deimos.config.logger.info(
86
+ message: 'Finished processing Kafka event',
87
+ payload: payload,
88
+ time_elapsed: time_taken,
89
+ metadata: metadata
90
+ )
91
+ end
92
+ end
93
+ end
94
+ end
@@ -1,94 +1,104 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'deimos/base_consumer'
4
- require 'deimos/shared_config'
5
- require 'phobos/handler'
6
- require 'active_support/all'
3
+ require 'deimos/consume/batch_consumption'
4
+ require 'deimos/consume/message_consumption'
7
5
 
8
- # Class to consume messages coming from the pipeline topic
6
+ # Class to consume messages coming from a Kafka topic
9
7
  # Note: According to the docs, instances of your handler will be created
10
- # for every incoming message. This class should be lightweight.
8
+ # for every incoming message/batch. This class should be lightweight.
11
9
  module Deimos
12
- # Parent consumer class.
13
- class Consumer < BaseConsumer
14
- include Phobos::Handler
10
+ # Basic consumer class. Inherit from this class and override either consume
11
+ # or consume_batch, depending on the delivery mode of your listener.
12
+ # `consume` -> use `delivery :message` or `delivery :batch`
13
+ # `consume_batch` -> use `delivery :inline_batch`
14
+ class Consumer
15
+ include Consume::MessageConsumption
16
+ include Consume::BatchConsumption
17
+ include SharedConfig
15
18
 
16
- # :nodoc:
17
- def around_consume(payload, metadata)
18
- benchmark = Benchmark.measure do
19
- _with_error_span(payload, metadata) do
20
- new_metadata = metadata.dup
21
- new_metadata[:key] = decode_key(metadata[:key]) if self.class.config[:key_configured]
22
- decoded_payload = payload ? self.class.decoder.decode(payload) : nil
23
- _received_message(payload, metadata)
24
- yield decoded_payload, new_metadata
25
- end
19
+ class << self
20
+ # @return [Deimos::SchemaBackends::Base]
21
+ def decoder
22
+ @decoder ||= Deimos.schema_backend(schema: config[:schema],
23
+ namespace: config[:namespace])
24
+ end
25
+
26
+ # @return [Deimos::SchemaBackends::Base]
27
+ def key_decoder
28
+ @key_decoder ||= Deimos.schema_backend(schema: config[:key_schema],
29
+ namespace: config[:namespace])
26
30
  end
27
- _handle_success(benchmark.real, payload, metadata)
28
31
  end
29
32
 
30
- # Consume incoming messages.
31
- # @param _payload [String]
32
- # @param _metadata [Hash]
33
- def consume(_payload, _metadata)
34
- raise NotImplementedError
33
+ # Helper method to decode an encoded key.
34
+ # @param key [String]
35
+ # @return [Object] the decoded key.
36
+ def decode_key(key)
37
+ return nil if key.nil?
38
+
39
+ config = self.class.config
40
+ unless config[:key_configured]
41
+ raise 'No key config given - if you are not decoding keys, please use '\
42
+ '`key_config plain: true`'
43
+ end
44
+
45
+ if config[:key_field]
46
+ self.class.decoder.decode_key(key, config[:key_field])
47
+ elsif config[:key_schema]
48
+ self.class.key_decoder.decode(key, schema: config[:key_schema])
49
+ else # no encoding
50
+ key
51
+ end
35
52
  end
36
53
 
37
54
  private
38
55
 
39
- def _received_message(payload, metadata)
40
- Deimos.config.logger.info(
41
- message: 'Got Kafka event',
42
- payload: payload,
43
- metadata: metadata
56
+ def _with_span
57
+ @span = Deimos.config.tracer&.start(
58
+ 'deimos-consumer',
59
+ resource: self.class.name.gsub('::', '-')
44
60
  )
45
- Deimos.config.metrics&.increment('handler', tags: %W(
46
- status:received
61
+ yield
62
+ ensure
63
+ Deimos.config.tracer&.finish(@span)
64
+ end
65
+
66
+ def _report_time_delayed(payload, metadata)
67
+ return if payload.nil? || payload['timestamp'].blank?
68
+
69
+ begin
70
+ time_delayed = Time.now.in_time_zone - payload['timestamp'].to_datetime
71
+ rescue ArgumentError
72
+ Deimos.config.logger.info(
73
+ message: "Error parsing timestamp! #{payload['timestamp']}"
74
+ )
75
+ return
76
+ end
77
+ Deimos.config.metrics&.histogram('handler', time_delayed, tags: %W(
78
+ time:time_delayed
47
79
  topic:#{metadata[:topic]}
48
80
  ))
49
- _report_time_delayed(payload, metadata)
50
81
  end
51
82
 
52
- # @param exception [Throwable]
53
- # @param payload [Hash]
54
- # @param metadata [Hash]
55
- def _handle_error(exception, payload, metadata)
56
- Deimos.config.metrics&.increment(
57
- 'handler',
58
- tags: %W(
59
- status:error
60
- topic:#{metadata[:topic]}
61
- )
62
- )
63
- Deimos.config.logger.warn(
64
- message: 'Error consuming message',
65
- handler: self.class.name,
66
- metadata: metadata,
67
- data: payload,
68
- error_message: exception.message,
69
- error: exception.backtrace
70
- )
71
- super
83
+ # Overrideable method to determine if a given error should be considered
84
+ # "fatal" and always be reraised.
85
+ # @param _error [Exception]
86
+ # @param _payload [Hash]
87
+ # @param _metadata [Hash]
88
+ # @return [Boolean]
89
+ def fatal_error?(_error, _payload, _metadata)
90
+ false
72
91
  end
73
92
 
74
- # @param time_taken [Float]
93
+ # @param exception [Exception]
75
94
  # @param payload [Hash]
76
95
  # @param metadata [Hash]
77
- def _handle_success(time_taken, payload, metadata)
78
- Deimos.config.metrics&.histogram('handler', time_taken, tags: %W(
79
- time:consume
80
- topic:#{metadata[:topic]}
81
- ))
82
- Deimos.config.metrics&.increment('handler', tags: %W(
83
- status:success
84
- topic:#{metadata[:topic]}
85
- ))
86
- Deimos.config.logger.info(
87
- message: 'Finished processing Kafka event',
88
- payload: payload,
89
- time_elapsed: time_taken,
90
- metadata: metadata
91
- )
96
+ def _error(exception, payload, metadata)
97
+ Deimos.config.tracer&.set_error(@span, exception)
98
+
99
+ raise if Deimos.config.consumers.reraise_errors ||
100
+ Deimos.config.consumers.fatal_error&.call(exception, payload, metadata) ||
101
+ fatal_error?(exception, payload, metadata)
92
102
  end
93
103
  end
94
104
  end
@@ -42,7 +42,7 @@ module Deimos
42
42
  messages.map do |m|
43
43
  {
44
44
  key: m.key.present? ? decoder&.decode_key(m.key) || m.key : nil,
45
- payload: decoder&.decoder&.decode(self.message) || self.message
45
+ payload: decoder&.decoder&.decode(m.message) || m.message
46
46
  }
47
47
  end
48
48
  end
@@ -14,7 +14,7 @@ module Deimos
14
14
  # Try to create it - it's fine if it already exists
15
15
  begin
16
16
  self.create(topic: topic)
17
- rescue ActiveRecord::RecordNotUnique # rubocop:disable Lint/SuppressedException
17
+ rescue ActiveRecord::RecordNotUnique
18
18
  # continue on
19
19
  end
20
20
 
@@ -10,7 +10,7 @@ module Deimos
10
10
  # @param producer [Class]
11
11
  def initialize(payload, producer, topic: nil, key: nil, partition_key: nil)
12
12
  @payload = payload&.with_indifferent_access
13
- @producer_name = producer.name
13
+ @producer_name = producer&.name
14
14
  @topic = topic
15
15
  @key = key
16
16
  @partition_key = partition_key
@@ -70,5 +70,10 @@ module Deimos
70
70
  def ==(other)
71
71
  self.to_h == other.to_h
72
72
  end
73
+
74
+ # @return [Boolean] True if this message is a tombstone
75
+ def tombstone?
76
+ payload.nil?
77
+ end
73
78
  end
74
79
  end
@@ -1,6 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # rubocop:disable Lint/UnusedMethodArgument
4
3
  module Deimos
5
4
  module Metrics
6
5
  # Base class for all metrics providers.
@@ -35,4 +34,3 @@ module Deimos
35
34
  end
36
35
  end
37
36
  end
38
- # rubocop:enable Lint/UnusedMethodArgument
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ # ActiveRecord class to record the last time we polled the database.
5
+ # For use with DbPoller.
6
+ class PollInfo < ActiveRecord::Base
7
+ self.table_name = 'deimos_poll_info'
8
+ end
9
+ end
@@ -1,6 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # rubocop:disable Lint/UnusedMethodArgument
4
3
  module Deimos
5
4
  module Tracing
6
5
  # Base class for all tracing providers.
@@ -28,4 +27,3 @@ module Deimos
28
27
  end
29
28
  end
30
29
  end
31
- # rubocop:enable Lint/UnusedMethodArgument
@@ -0,0 +1,149 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'deimos/poll_info'
4
+ require 'sigurd/executor'
5
+ require 'sigurd/signal_handler'
6
+
7
+ module Deimos
8
+ module Utils
9
+ # Class which continually polls the database and sends Kafka messages.
10
+ class DbPoller
11
+ BATCH_SIZE = 1000
12
+
13
+ # Needed for Executor so it can identify the worker
14
+ attr_reader :id
15
+
16
+ # Begin the DB Poller process.
17
+ def self.start!
18
+ if Deimos.config.db_poller_objects.empty?
19
+ raise('No pollers configured!')
20
+ end
21
+
22
+ pollers = Deimos.config.db_poller_objects.map do |poller_config|
23
+ self.new(poller_config)
24
+ end
25
+ executor = Sigurd::Executor.new(pollers,
26
+ sleep_seconds: 5,
27
+ logger: Deimos.config.logger)
28
+ signal_handler = Sigurd::SignalHandler.new(executor)
29
+ signal_handler.run!
30
+ end
31
+
32
+ # @param config [Deimos::Configuration::ConfigStruct]
33
+ def initialize(config)
34
+ @config = config
35
+ @id = SecureRandom.hex
36
+ begin
37
+ @producer = @config.producer_class.constantize
38
+ rescue NameError
39
+ raise "Class #{@config.producer_class} not found!"
40
+ end
41
+ unless @producer < Deimos::ActiveRecordProducer
42
+ raise "Class #{@producer.class.name} is not an ActiveRecordProducer!"
43
+ end
44
+ end
45
+
46
+ # Start the poll:
47
+ # 1) Grab the current PollInfo from the database indicating the last
48
+ # time we ran
49
+ # 2) On a loop, process all the recent updates between the last time
50
+ # we ran and now.
51
+ def start
52
+ # Don't send asynchronously
53
+ if Deimos.config.producers.backend == :kafka_async
54
+ Deimos.config.producers.backend = :kafka
55
+ end
56
+ Deimos.config.logger.info('Starting...')
57
+ @signal_to_stop = false
58
+ retrieve_poll_info
59
+ loop do
60
+ if @signal_to_stop
61
+ Deimos.config.logger.info('Shutting down')
62
+ break
63
+ end
64
+ process_updates
65
+ sleep 0.1
66
+ end
67
+ end
68
+
69
+ # Grab the PollInfo or create if it doesn't exist.
70
+ def retrieve_poll_info
71
+ ActiveRecord::Base.connection.reconnect!
72
+ new_time = @config.start_from_beginning ? Time.new(0) : Time.zone.now
73
+ @info = Deimos::PollInfo.find_by_producer(@config.producer_class) ||
74
+ Deimos::PollInfo.create!(producer: @config.producer_class,
75
+ last_sent: new_time,
76
+ last_sent_id: 0)
77
+ end
78
+
79
+ # Stop the poll.
80
+ def stop
81
+ Deimos.config.logger.info('Received signal to stop')
82
+ @signal_to_stop = true
83
+ end
84
+
85
+ # Indicate whether this current loop should process updates. Most loops
86
+ # will busy-wait (sleeping 0.1 seconds) until it's ready.
87
+ # @return [Boolean]
88
+ def should_run?
89
+ Time.zone.now - @info.last_sent - @config.delay_time >= @config.run_every
90
+ end
91
+
92
+ # @param record [ActiveRecord::Base]
93
+ # @return [ActiveSupport::TimeWithZone]
94
+ def last_updated(record)
95
+ record.public_send(@config.timestamp_column)
96
+ end
97
+
98
+ # Send messages for updated data.
99
+ def process_updates
100
+ return unless should_run?
101
+
102
+ time_from = @config.full_table ? Time.new(0) : @info.last_sent.in_time_zone
103
+ time_to = Time.zone.now - @config.delay_time
104
+ Deimos.config.logger.info("Polling #{@producer.topic} from #{time_from} to #{time_to}")
105
+ message_count = 0
106
+ batch_count = 0
107
+
108
+ # poll_query gets all the relevant data from the database, as defined
109
+ # by the producer itself.
110
+ loop do
111
+ Deimos.config.logger.debug("Polling #{@producer.topic}, batch #{batch_count + 1}")
112
+ batch = fetch_results(time_from, time_to).to_a
113
+ break if batch.empty?
114
+
115
+ batch_count += 1
116
+ process_batch(batch)
117
+ message_count += batch.size
118
+ time_from = last_updated(batch.last)
119
+ end
120
+ Deimos.config.logger.info("Poll #{@producer.topic} complete at #{time_to} (#{message_count} messages, #{batch_count} batches}")
121
+ end
122
+
123
+ # @param time_from [ActiveSupport::TimeWithZone]
124
+ # @param time_to [ActiveSupport::TimeWithZone]
125
+ # @return [ActiveRecord::Relation]
126
+ def fetch_results(time_from, time_to)
127
+ id = @producer.config[:record_class].primary_key
128
+ quoted_timestamp = ActiveRecord::Base.connection.quote_column_name(@config.timestamp_column)
129
+ quoted_id = ActiveRecord::Base.connection.quote_column_name(id)
130
+ @producer.poll_query(time_from: time_from,
131
+ time_to: time_to,
132
+ column_name: @config.timestamp_column,
133
+ min_id: @info.last_sent_id).
134
+ limit(BATCH_SIZE).
135
+ order("#{quoted_timestamp}, #{quoted_id}")
136
+ end
137
+
138
+ # @param batch [Array<ActiveRecord::Base>]
139
+ def process_batch(batch)
140
+ record = batch.last
141
+ id_method = record.class.primary_key
142
+ last_id = record.public_send(id_method)
143
+ last_updated_at = last_updated(record)
144
+ @producer.send_events(batch)
145
+ @info.update_attributes!(last_sent: last_updated_at, last_sent_id: last_id)
146
+ end
147
+ end
148
+ end
149
+ end