deimos-ruby 1.6.2 → 1.8.0.pre.beta2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +9 -0
  3. data/.rubocop.yml +15 -13
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +31 -0
  6. data/Gemfile.lock +43 -36
  7. data/README.md +141 -16
  8. data/Rakefile +1 -1
  9. data/deimos-ruby.gemspec +2 -1
  10. data/docs/ARCHITECTURE.md +144 -0
  11. data/docs/CONFIGURATION.md +27 -0
  12. data/lib/deimos.rb +7 -6
  13. data/lib/deimos/active_record_consume/batch_consumption.rb +159 -0
  14. data/lib/deimos/active_record_consume/batch_slicer.rb +27 -0
  15. data/lib/deimos/active_record_consume/message_consumption.rb +58 -0
  16. data/lib/deimos/active_record_consume/schema_model_converter.rb +52 -0
  17. data/lib/deimos/active_record_consumer.rb +33 -75
  18. data/lib/deimos/active_record_producer.rb +23 -0
  19. data/lib/deimos/batch_consumer.rb +2 -140
  20. data/lib/deimos/config/configuration.rb +28 -10
  21. data/lib/deimos/consume/batch_consumption.rb +150 -0
  22. data/lib/deimos/consume/message_consumption.rb +94 -0
  23. data/lib/deimos/consumer.rb +79 -69
  24. data/lib/deimos/kafka_message.rb +1 -1
  25. data/lib/deimos/kafka_topic_info.rb +1 -1
  26. data/lib/deimos/message.rb +6 -1
  27. data/lib/deimos/metrics/provider.rb +0 -2
  28. data/lib/deimos/poll_info.rb +9 -0
  29. data/lib/deimos/tracing/provider.rb +0 -2
  30. data/lib/deimos/utils/db_poller.rb +149 -0
  31. data/lib/deimos/utils/db_producer.rb +8 -3
  32. data/lib/deimos/utils/deadlock_retry.rb +68 -0
  33. data/lib/deimos/utils/lag_reporter.rb +19 -26
  34. data/lib/deimos/version.rb +1 -1
  35. data/lib/generators/deimos/db_poller/templates/migration +11 -0
  36. data/lib/generators/deimos/db_poller/templates/rails3_migration +16 -0
  37. data/lib/generators/deimos/db_poller_generator.rb +48 -0
  38. data/lib/tasks/deimos.rake +7 -0
  39. data/spec/active_record_batch_consumer_spec.rb +481 -0
  40. data/spec/active_record_consume/batch_slicer_spec.rb +42 -0
  41. data/spec/active_record_consume/schema_model_converter_spec.rb +105 -0
  42. data/spec/active_record_consumer_spec.rb +3 -11
  43. data/spec/active_record_producer_spec.rb +66 -88
  44. data/spec/batch_consumer_spec.rb +24 -7
  45. data/spec/config/configuration_spec.rb +4 -0
  46. data/spec/consumer_spec.rb +8 -8
  47. data/spec/deimos_spec.rb +57 -49
  48. data/spec/handlers/my_batch_consumer.rb +6 -1
  49. data/spec/handlers/my_consumer.rb +6 -1
  50. data/spec/message_spec.rb +19 -0
  51. data/spec/producer_spec.rb +3 -3
  52. data/spec/rake_spec.rb +1 -1
  53. data/spec/schemas/com/my-namespace/MySchemaCompound-key.avsc +18 -0
  54. data/spec/schemas/com/my-namespace/Wibble.avsc +43 -0
  55. data/spec/spec_helper.rb +61 -6
  56. data/spec/utils/db_poller_spec.rb +320 -0
  57. data/spec/utils/deadlock_retry_spec.rb +74 -0
  58. data/spec/utils/lag_reporter_spec.rb +29 -22
  59. metadata +55 -20
  60. data/lib/deimos/base_consumer.rb +0 -104
  61. data/lib/deimos/utils/executor.rb +0 -124
  62. data/lib/deimos/utils/platform_schema_validation.rb +0 -0
  63. data/lib/deimos/utils/signal_handler.rb +0 -68
  64. data/spec/utils/executor_spec.rb +0 -53
  65. data/spec/utils/signal_handler_spec.rb +0 -16
@@ -0,0 +1,94 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ module Consume
5
+ # Methods used by message-by-message (non-batch) consumers. These consumers
6
+ # are invoked for every individual message.
7
+ module MessageConsumption
8
+ extend ActiveSupport::Concern
9
+ include Phobos::Handler
10
+
11
+ # :nodoc:
12
+ def around_consume(payload, metadata)
13
+ decoded_payload = payload.dup
14
+ new_metadata = metadata.dup
15
+ benchmark = Benchmark.measure do
16
+ _with_span do
17
+ new_metadata[:key] = decode_key(metadata[:key]) if self.class.config[:key_configured]
18
+ decoded_payload = payload ? self.class.decoder.decode(payload) : nil
19
+ _received_message(decoded_payload, new_metadata)
20
+ yield decoded_payload, new_metadata
21
+ end
22
+ end
23
+ _handle_success(benchmark.real, decoded_payload, new_metadata)
24
+ rescue StandardError => e
25
+ _handle_error(e, decoded_payload, new_metadata)
26
+ end
27
+
28
+ # Consume incoming messages.
29
+ # @param _payload [String]
30
+ # @param _metadata [Hash]
31
+ def consume(_payload, _metadata)
32
+ raise NotImplementedError
33
+ end
34
+
35
+ private
36
+
37
+ def _received_message(payload, metadata)
38
+ Deimos.config.logger.info(
39
+ message: 'Got Kafka event',
40
+ payload: payload,
41
+ metadata: metadata
42
+ )
43
+ Deimos.config.metrics&.increment('handler', tags: %W(
44
+ status:received
45
+ topic:#{metadata[:topic]}
46
+ ))
47
+ _report_time_delayed(payload, metadata)
48
+ end
49
+
50
+ # @param exception [Throwable]
51
+ # @param payload [Hash]
52
+ # @param metadata [Hash]
53
+ def _handle_error(exception, payload, metadata)
54
+ Deimos.config.metrics&.increment(
55
+ 'handler',
56
+ tags: %W(
57
+ status:error
58
+ topic:#{metadata[:topic]}
59
+ )
60
+ )
61
+ Deimos.config.logger.warn(
62
+ message: 'Error consuming message',
63
+ handler: self.class.name,
64
+ metadata: metadata,
65
+ data: payload,
66
+ error_message: exception.message,
67
+ error: exception.backtrace
68
+ )
69
+
70
+ _error(exception, payload, metadata)
71
+ end
72
+
73
+ # @param time_taken [Float]
74
+ # @param payload [Hash]
75
+ # @param metadata [Hash]
76
+ def _handle_success(time_taken, payload, metadata)
77
+ Deimos.config.metrics&.histogram('handler', time_taken, tags: %W(
78
+ time:consume
79
+ topic:#{metadata[:topic]}
80
+ ))
81
+ Deimos.config.metrics&.increment('handler', tags: %W(
82
+ status:success
83
+ topic:#{metadata[:topic]}
84
+ ))
85
+ Deimos.config.logger.info(
86
+ message: 'Finished processing Kafka event',
87
+ payload: payload,
88
+ time_elapsed: time_taken,
89
+ metadata: metadata
90
+ )
91
+ end
92
+ end
93
+ end
94
+ end
@@ -1,94 +1,104 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'deimos/base_consumer'
4
- require 'deimos/shared_config'
5
- require 'phobos/handler'
6
- require 'active_support/all'
3
+ require 'deimos/consume/batch_consumption'
4
+ require 'deimos/consume/message_consumption'
7
5
 
8
- # Class to consume messages coming from the pipeline topic
6
+ # Class to consume messages coming from a Kafka topic
9
7
  # Note: According to the docs, instances of your handler will be created
10
- # for every incoming message. This class should be lightweight.
8
+ # for every incoming message/batch. This class should be lightweight.
11
9
  module Deimos
12
- # Parent consumer class.
13
- class Consumer < BaseConsumer
14
- include Phobos::Handler
10
+ # Basic consumer class. Inherit from this class and override either consume
11
+ # or consume_batch, depending on the delivery mode of your listener.
12
+ # `consume` -> use `delivery :message` or `delivery :batch`
13
+ # `consume_batch` -> use `delivery :inline_batch`
14
+ class Consumer
15
+ include Consume::MessageConsumption
16
+ include Consume::BatchConsumption
17
+ include SharedConfig
15
18
 
16
- # :nodoc:
17
- def around_consume(payload, metadata)
18
- benchmark = Benchmark.measure do
19
- _with_error_span(payload, metadata) do
20
- new_metadata = metadata.dup
21
- new_metadata[:key] = decode_key(metadata[:key]) if self.class.config[:key_configured]
22
- decoded_payload = payload ? self.class.decoder.decode(payload) : nil
23
- _received_message(payload, metadata)
24
- yield decoded_payload, new_metadata
25
- end
19
+ class << self
20
+ # @return [Deimos::SchemaBackends::Base]
21
+ def decoder
22
+ @decoder ||= Deimos.schema_backend(schema: config[:schema],
23
+ namespace: config[:namespace])
24
+ end
25
+
26
+ # @return [Deimos::SchemaBackends::Base]
27
+ def key_decoder
28
+ @key_decoder ||= Deimos.schema_backend(schema: config[:key_schema],
29
+ namespace: config[:namespace])
26
30
  end
27
- _handle_success(benchmark.real, payload, metadata)
28
31
  end
29
32
 
30
- # Consume incoming messages.
31
- # @param _payload [String]
32
- # @param _metadata [Hash]
33
- def consume(_payload, _metadata)
34
- raise NotImplementedError
33
+ # Helper method to decode an encoded key.
34
+ # @param key [String]
35
+ # @return [Object] the decoded key.
36
+ def decode_key(key)
37
+ return nil if key.nil?
38
+
39
+ config = self.class.config
40
+ unless config[:key_configured]
41
+ raise 'No key config given - if you are not decoding keys, please use '\
42
+ '`key_config plain: true`'
43
+ end
44
+
45
+ if config[:key_field]
46
+ self.class.decoder.decode_key(key, config[:key_field])
47
+ elsif config[:key_schema]
48
+ self.class.key_decoder.decode(key, schema: config[:key_schema])
49
+ else # no encoding
50
+ key
51
+ end
35
52
  end
36
53
 
37
54
  private
38
55
 
39
- def _received_message(payload, metadata)
40
- Deimos.config.logger.info(
41
- message: 'Got Kafka event',
42
- payload: payload,
43
- metadata: metadata
56
+ def _with_span
57
+ @span = Deimos.config.tracer&.start(
58
+ 'deimos-consumer',
59
+ resource: self.class.name.gsub('::', '-')
44
60
  )
45
- Deimos.config.metrics&.increment('handler', tags: %W(
46
- status:received
61
+ yield
62
+ ensure
63
+ Deimos.config.tracer&.finish(@span)
64
+ end
65
+
66
+ def _report_time_delayed(payload, metadata)
67
+ return if payload.nil? || payload['timestamp'].blank?
68
+
69
+ begin
70
+ time_delayed = Time.now.in_time_zone - payload['timestamp'].to_datetime
71
+ rescue ArgumentError
72
+ Deimos.config.logger.info(
73
+ message: "Error parsing timestamp! #{payload['timestamp']}"
74
+ )
75
+ return
76
+ end
77
+ Deimos.config.metrics&.histogram('handler', time_delayed, tags: %W(
78
+ time:time_delayed
47
79
  topic:#{metadata[:topic]}
48
80
  ))
49
- _report_time_delayed(payload, metadata)
50
81
  end
51
82
 
52
- # @param exception [Throwable]
53
- # @param payload [Hash]
54
- # @param metadata [Hash]
55
- def _handle_error(exception, payload, metadata)
56
- Deimos.config.metrics&.increment(
57
- 'handler',
58
- tags: %W(
59
- status:error
60
- topic:#{metadata[:topic]}
61
- )
62
- )
63
- Deimos.config.logger.warn(
64
- message: 'Error consuming message',
65
- handler: self.class.name,
66
- metadata: metadata,
67
- data: payload,
68
- error_message: exception.message,
69
- error: exception.backtrace
70
- )
71
- super
83
+ # Overrideable method to determine if a given error should be considered
84
+ # "fatal" and always be reraised.
85
+ # @param _error [Exception]
86
+ # @param _payload [Hash]
87
+ # @param _metadata [Hash]
88
+ # @return [Boolean]
89
+ def fatal_error?(_error, _payload, _metadata)
90
+ false
72
91
  end
73
92
 
74
- # @param time_taken [Float]
93
+ # @param exception [Exception]
75
94
  # @param payload [Hash]
76
95
  # @param metadata [Hash]
77
- def _handle_success(time_taken, payload, metadata)
78
- Deimos.config.metrics&.histogram('handler', time_taken, tags: %W(
79
- time:consume
80
- topic:#{metadata[:topic]}
81
- ))
82
- Deimos.config.metrics&.increment('handler', tags: %W(
83
- status:success
84
- topic:#{metadata[:topic]}
85
- ))
86
- Deimos.config.logger.info(
87
- message: 'Finished processing Kafka event',
88
- payload: payload,
89
- time_elapsed: time_taken,
90
- metadata: metadata
91
- )
96
+ def _error(exception, payload, metadata)
97
+ Deimos.config.tracer&.set_error(@span, exception)
98
+
99
+ raise if Deimos.config.consumers.reraise_errors ||
100
+ Deimos.config.consumers.fatal_error&.call(exception, payload, metadata) ||
101
+ fatal_error?(exception, payload, metadata)
92
102
  end
93
103
  end
94
104
  end
@@ -42,7 +42,7 @@ module Deimos
42
42
  messages.map do |m|
43
43
  {
44
44
  key: m.key.present? ? decoder&.decode_key(m.key) || m.key : nil,
45
- payload: decoder&.decoder&.decode(self.message) || self.message
45
+ payload: decoder&.decoder&.decode(m.message) || m.message
46
46
  }
47
47
  end
48
48
  end
@@ -14,7 +14,7 @@ module Deimos
14
14
  # Try to create it - it's fine if it already exists
15
15
  begin
16
16
  self.create(topic: topic)
17
- rescue ActiveRecord::RecordNotUnique # rubocop:disable Lint/SuppressedException
17
+ rescue ActiveRecord::RecordNotUnique
18
18
  # continue on
19
19
  end
20
20
 
@@ -10,7 +10,7 @@ module Deimos
10
10
  # @param producer [Class]
11
11
  def initialize(payload, producer, topic: nil, key: nil, partition_key: nil)
12
12
  @payload = payload&.with_indifferent_access
13
- @producer_name = producer.name
13
+ @producer_name = producer&.name
14
14
  @topic = topic
15
15
  @key = key
16
16
  @partition_key = partition_key
@@ -70,5 +70,10 @@ module Deimos
70
70
  def ==(other)
71
71
  self.to_h == other.to_h
72
72
  end
73
+
74
+ # @return [Boolean] True if this message is a tombstone
75
+ def tombstone?
76
+ payload.nil?
77
+ end
73
78
  end
74
79
  end
@@ -1,6 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # rubocop:disable Lint/UnusedMethodArgument
4
3
  module Deimos
5
4
  module Metrics
6
5
  # Base class for all metrics providers.
@@ -35,4 +34,3 @@ module Deimos
35
34
  end
36
35
  end
37
36
  end
38
- # rubocop:enable Lint/UnusedMethodArgument
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ # ActiveRecord class to record the last time we polled the database.
5
+ # For use with DbPoller.
6
+ class PollInfo < ActiveRecord::Base
7
+ self.table_name = 'deimos_poll_info'
8
+ end
9
+ end
@@ -1,6 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # rubocop:disable Lint/UnusedMethodArgument
4
3
  module Deimos
5
4
  module Tracing
6
5
  # Base class for all tracing providers.
@@ -28,4 +27,3 @@ module Deimos
28
27
  end
29
28
  end
30
29
  end
31
- # rubocop:enable Lint/UnusedMethodArgument
@@ -0,0 +1,149 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'deimos/poll_info'
4
+ require 'sigurd/executor'
5
+ require 'sigurd/signal_handler'
6
+
7
+ module Deimos
8
+ module Utils
9
+ # Class which continually polls the database and sends Kafka messages.
10
+ class DbPoller
11
+ BATCH_SIZE = 1000
12
+
13
+ # Needed for Executor so it can identify the worker
14
+ attr_reader :id
15
+
16
+ # Begin the DB Poller process.
17
+ def self.start!
18
+ if Deimos.config.db_poller_objects.empty?
19
+ raise('No pollers configured!')
20
+ end
21
+
22
+ pollers = Deimos.config.db_poller_objects.map do |poller_config|
23
+ self.new(poller_config)
24
+ end
25
+ executor = Sigurd::Executor.new(pollers,
26
+ sleep_seconds: 5,
27
+ logger: Deimos.config.logger)
28
+ signal_handler = Sigurd::SignalHandler.new(executor)
29
+ signal_handler.run!
30
+ end
31
+
32
+ # @param config [Deimos::Configuration::ConfigStruct]
33
+ def initialize(config)
34
+ @config = config
35
+ @id = SecureRandom.hex
36
+ begin
37
+ @producer = @config.producer_class.constantize
38
+ rescue NameError
39
+ raise "Class #{@config.producer_class} not found!"
40
+ end
41
+ unless @producer < Deimos::ActiveRecordProducer
42
+ raise "Class #{@producer.class.name} is not an ActiveRecordProducer!"
43
+ end
44
+ end
45
+
46
+ # Start the poll:
47
+ # 1) Grab the current PollInfo from the database indicating the last
48
+ # time we ran
49
+ # 2) On a loop, process all the recent updates between the last time
50
+ # we ran and now.
51
+ def start
52
+ # Don't send asynchronously
53
+ if Deimos.config.producers.backend == :kafka_async
54
+ Deimos.config.producers.backend = :kafka
55
+ end
56
+ Deimos.config.logger.info('Starting...')
57
+ @signal_to_stop = false
58
+ retrieve_poll_info
59
+ loop do
60
+ if @signal_to_stop
61
+ Deimos.config.logger.info('Shutting down')
62
+ break
63
+ end
64
+ process_updates
65
+ sleep 0.1
66
+ end
67
+ end
68
+
69
+ # Grab the PollInfo or create if it doesn't exist.
70
+ def retrieve_poll_info
71
+ ActiveRecord::Base.connection.reconnect!
72
+ new_time = @config.start_from_beginning ? Time.new(0) : Time.zone.now
73
+ @info = Deimos::PollInfo.find_by_producer(@config.producer_class) ||
74
+ Deimos::PollInfo.create!(producer: @config.producer_class,
75
+ last_sent: new_time,
76
+ last_sent_id: 0)
77
+ end
78
+
79
+ # Stop the poll.
80
+ def stop
81
+ Deimos.config.logger.info('Received signal to stop')
82
+ @signal_to_stop = true
83
+ end
84
+
85
+ # Indicate whether this current loop should process updates. Most loops
86
+ # will busy-wait (sleeping 0.1 seconds) until it's ready.
87
+ # @return [Boolean]
88
+ def should_run?
89
+ Time.zone.now - @info.last_sent - @config.delay_time >= @config.run_every
90
+ end
91
+
92
+ # @param record [ActiveRecord::Base]
93
+ # @return [ActiveSupport::TimeWithZone]
94
+ def last_updated(record)
95
+ record.public_send(@config.timestamp_column)
96
+ end
97
+
98
+ # Send messages for updated data.
99
+ def process_updates
100
+ return unless should_run?
101
+
102
+ time_from = @config.full_table ? Time.new(0) : @info.last_sent.in_time_zone
103
+ time_to = Time.zone.now - @config.delay_time
104
+ Deimos.config.logger.info("Polling #{@producer.topic} from #{time_from} to #{time_to}")
105
+ message_count = 0
106
+ batch_count = 0
107
+
108
+ # poll_query gets all the relevant data from the database, as defined
109
+ # by the producer itself.
110
+ loop do
111
+ Deimos.config.logger.debug("Polling #{@producer.topic}, batch #{batch_count + 1}")
112
+ batch = fetch_results(time_from, time_to).to_a
113
+ break if batch.empty?
114
+
115
+ batch_count += 1
116
+ process_batch(batch)
117
+ message_count += batch.size
118
+ time_from = last_updated(batch.last)
119
+ end
120
+ Deimos.config.logger.info("Poll #{@producer.topic} complete at #{time_to} (#{message_count} messages, #{batch_count} batches}")
121
+ end
122
+
123
+ # @param time_from [ActiveSupport::TimeWithZone]
124
+ # @param time_to [ActiveSupport::TimeWithZone]
125
+ # @return [ActiveRecord::Relation]
126
+ def fetch_results(time_from, time_to)
127
+ id = @producer.config[:record_class].primary_key
128
+ quoted_timestamp = ActiveRecord::Base.connection.quote_column_name(@config.timestamp_column)
129
+ quoted_id = ActiveRecord::Base.connection.quote_column_name(id)
130
+ @producer.poll_query(time_from: time_from,
131
+ time_to: time_to,
132
+ column_name: @config.timestamp_column,
133
+ min_id: @info.last_sent_id).
134
+ limit(BATCH_SIZE).
135
+ order("#{quoted_timestamp}, #{quoted_id}")
136
+ end
137
+
138
+ # @param batch [Array<ActiveRecord::Base>]
139
+ def process_batch(batch)
140
+ record = batch.last
141
+ id_method = record.class.primary_key
142
+ last_id = record.public_send(id_method)
143
+ last_updated_at = last_updated(record)
144
+ @producer.send_events(batch)
145
+ @info.update_attributes!(last_sent: last_updated_at, last_sent_id: last_id)
146
+ end
147
+ end
148
+ end
149
+ end