deimos-ruby 1.6.1 → 1.8.0.pre.beta1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +9 -0
  3. data/.rubocop.yml +15 -13
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +30 -0
  6. data/Gemfile.lock +87 -80
  7. data/README.md +139 -15
  8. data/Rakefile +1 -1
  9. data/deimos-ruby.gemspec +3 -2
  10. data/docs/ARCHITECTURE.md +144 -0
  11. data/docs/CONFIGURATION.md +27 -0
  12. data/lib/deimos.rb +7 -6
  13. data/lib/deimos/active_record_consume/batch_consumption.rb +159 -0
  14. data/lib/deimos/active_record_consume/batch_slicer.rb +27 -0
  15. data/lib/deimos/active_record_consume/message_consumption.rb +58 -0
  16. data/lib/deimos/active_record_consume/schema_model_converter.rb +52 -0
  17. data/lib/deimos/active_record_consumer.rb +33 -75
  18. data/lib/deimos/active_record_producer.rb +23 -0
  19. data/lib/deimos/batch_consumer.rb +2 -140
  20. data/lib/deimos/config/configuration.rb +28 -10
  21. data/lib/deimos/consume/batch_consumption.rb +148 -0
  22. data/lib/deimos/consume/message_consumption.rb +93 -0
  23. data/lib/deimos/consumer.rb +79 -69
  24. data/lib/deimos/kafka_message.rb +1 -1
  25. data/lib/deimos/kafka_source.rb +29 -23
  26. data/lib/deimos/kafka_topic_info.rb +1 -1
  27. data/lib/deimos/message.rb +6 -1
  28. data/lib/deimos/metrics/provider.rb +0 -2
  29. data/lib/deimos/poll_info.rb +9 -0
  30. data/lib/deimos/tracing/provider.rb +0 -2
  31. data/lib/deimos/utils/db_poller.rb +149 -0
  32. data/lib/deimos/utils/db_producer.rb +8 -3
  33. data/lib/deimos/utils/deadlock_retry.rb +68 -0
  34. data/lib/deimos/utils/lag_reporter.rb +19 -26
  35. data/lib/deimos/version.rb +1 -1
  36. data/lib/generators/deimos/db_poller/templates/migration +11 -0
  37. data/lib/generators/deimos/db_poller/templates/rails3_migration +16 -0
  38. data/lib/generators/deimos/db_poller_generator.rb +48 -0
  39. data/lib/tasks/deimos.rake +7 -0
  40. data/spec/active_record_batch_consumer_spec.rb +481 -0
  41. data/spec/active_record_consume/batch_slicer_spec.rb +42 -0
  42. data/spec/active_record_consume/schema_model_converter_spec.rb +105 -0
  43. data/spec/active_record_consumer_spec.rb +22 -11
  44. data/spec/active_record_producer_spec.rb +66 -88
  45. data/spec/batch_consumer_spec.rb +23 -7
  46. data/spec/config/configuration_spec.rb +4 -0
  47. data/spec/consumer_spec.rb +8 -8
  48. data/spec/deimos_spec.rb +57 -49
  49. data/spec/handlers/my_batch_consumer.rb +6 -1
  50. data/spec/handlers/my_consumer.rb +6 -1
  51. data/spec/kafka_source_spec.rb +53 -0
  52. data/spec/message_spec.rb +19 -0
  53. data/spec/producer_spec.rb +3 -3
  54. data/spec/rake_spec.rb +1 -1
  55. data/spec/schemas/com/my-namespace/MySchemaCompound-key.avsc +18 -0
  56. data/spec/schemas/com/my-namespace/Wibble.avsc +43 -0
  57. data/spec/spec_helper.rb +61 -6
  58. data/spec/utils/db_poller_spec.rb +320 -0
  59. data/spec/utils/deadlock_retry_spec.rb +74 -0
  60. data/spec/utils/lag_reporter_spec.rb +29 -22
  61. metadata +61 -20
  62. data/lib/deimos/base_consumer.rb +0 -104
  63. data/lib/deimos/utils/executor.rb +0 -124
  64. data/lib/deimos/utils/platform_schema_validation.rb +0 -0
  65. data/lib/deimos/utils/signal_handler.rb +0 -68
  66. data/spec/utils/executor_spec.rb +0 -53
  67. data/spec/utils/signal_handler_spec.rb +0 -16
@@ -42,7 +42,7 @@ module Deimos
42
42
  messages.map do |m|
43
43
  {
44
44
  key: m.key.present? ? decoder&.decode_key(m.key) || m.key : nil,
45
- payload: decoder&.decoder&.decode(self.message) || self.message
45
+ payload: decoder&.decoder&.decode(m.message) || m.message
46
46
  }
47
47
  end
48
48
  end
@@ -93,31 +93,37 @@ module Deimos
93
93
 
94
94
  # This will contain an array of hashes, where each hash is the actual
95
95
  # attribute hash that created the object.
96
- ids = if results.is_a?(Array)
97
- results[1]
98
- elsif results.respond_to?(:ids)
99
- results.ids
100
- else
101
- []
102
- end
103
- if ids.blank?
104
- # re-fill IDs based on what was just entered into the DB.
105
- if self.connection.adapter_name.downcase =~ /sqlite/
106
- last_id = self.connection.select_value('select last_insert_rowid()')
107
- ids = ((last_id - array_of_attributes.size + 1)..last_id).to_a
108
- else # mysql
109
- last_id = self.connection.select_value('select LAST_INSERT_ID()')
110
- ids = (last_id..(last_id + array_of_attributes.size)).to_a
111
- end
112
- end
113
96
  array_of_hashes = []
114
- array_of_attributes.each_with_index do |array, i|
115
- hash = column_names.zip(array).to_h.with_indifferent_access
116
- hash[self.primary_key] = ids[i] if hash[self.primary_key].blank?
117
- array_of_hashes << hash
97
+ array_of_attributes.each do |array|
98
+ array_of_hashes << column_names.zip(array).to_h.with_indifferent_access
99
+ end
100
+ hashes_with_id, hashes_without_id = array_of_hashes.partition { |arr| arr[:id].present? }
101
+
102
+ self.kafka_producers.each { |p| p.send_events(hashes_with_id) }
103
+
104
+ if hashes_without_id.any?
105
+ if options[:on_duplicate_key_update].present? &&
106
+ options[:on_duplicate_key_update] != [:updated_at]
107
+ unique_columns = column_names.map(&:to_s) -
108
+ options[:on_duplicate_key_update].map(&:to_s) - %w(id created_at)
109
+ records = hashes_without_id.map do |hash|
110
+ self.where(unique_columns.map { |c| [c, hash[c]] }.to_h).first
111
+ end
112
+ self.kafka_producers.each { |p| p.send_events(records) }
113
+ else
114
+ # re-fill IDs based on what was just entered into the DB.
115
+ last_id = if self.connection.adapter_name.downcase =~ /sqlite/
116
+ self.connection.select_value('select last_insert_rowid()') -
117
+ hashes_without_id.size + 1
118
+ else # mysql
119
+ self.connection.select_value('select LAST_INSERT_ID()')
120
+ end
121
+ hashes_without_id.each_with_index do |attrs, i|
122
+ attrs[:id] = last_id + i
123
+ end
124
+ self.kafka_producers.each { |p| p.send_events(hashes_without_id) }
125
+ end
118
126
  end
119
-
120
- self.kafka_producers.each { |p| p.send_events(array_of_hashes) }
121
127
  results
122
128
  end
123
129
  end
@@ -14,7 +14,7 @@ module Deimos
14
14
  # Try to create it - it's fine if it already exists
15
15
  begin
16
16
  self.create(topic: topic)
17
- rescue ActiveRecord::RecordNotUnique # rubocop:disable Lint/SuppressedException
17
+ rescue ActiveRecord::RecordNotUnique
18
18
  # continue on
19
19
  end
20
20
 
@@ -10,7 +10,7 @@ module Deimos
10
10
  # @param producer [Class]
11
11
  def initialize(payload, producer, topic: nil, key: nil, partition_key: nil)
12
12
  @payload = payload&.with_indifferent_access
13
- @producer_name = producer.name
13
+ @producer_name = producer&.name
14
14
  @topic = topic
15
15
  @key = key
16
16
  @partition_key = partition_key
@@ -70,5 +70,10 @@ module Deimos
70
70
  def ==(other)
71
71
  self.to_h == other.to_h
72
72
  end
73
+
74
+ # @return [Boolean] True if this message is a tombstone
75
+ def tombstone?
76
+ payload.nil?
77
+ end
73
78
  end
74
79
  end
@@ -1,6 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # rubocop:disable Lint/UnusedMethodArgument
4
3
  module Deimos
5
4
  module Metrics
6
5
  # Base class for all metrics providers.
@@ -35,4 +34,3 @@ module Deimos
35
34
  end
36
35
  end
37
36
  end
38
- # rubocop:enable Lint/UnusedMethodArgument
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ # ActiveRecord class to record the last time we polled the database.
5
+ # For use with DbPoller.
6
+ class PollInfo < ActiveRecord::Base
7
+ self.table_name = 'deimos_poll_info'
8
+ end
9
+ end
@@ -1,6 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # rubocop:disable Lint/UnusedMethodArgument
4
3
  module Deimos
5
4
  module Tracing
6
5
  # Base class for all tracing providers.
@@ -28,4 +27,3 @@ module Deimos
28
27
  end
29
28
  end
30
29
  end
31
- # rubocop:enable Lint/UnusedMethodArgument
@@ -0,0 +1,149 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'deimos/poll_info'
4
+ require 'sigurd/executor'
5
+ require 'sigurd/signal_handler'
6
+
7
+ module Deimos
8
+ module Utils
9
+ # Class which continually polls the database and sends Kafka messages.
10
+ class DbPoller
11
+ BATCH_SIZE = 1000
12
+
13
+ # Needed for Executor so it can identify the worker
14
+ attr_reader :id
15
+
16
+ # Begin the DB Poller process.
17
+ def self.start!
18
+ if Deimos.config.db_poller_objects.empty?
19
+ raise('No pollers configured!')
20
+ end
21
+
22
+ pollers = Deimos.config.db_poller_objects.map do |poller_config|
23
+ self.new(poller_config)
24
+ end
25
+ executor = Sigurd::Executor.new(pollers,
26
+ sleep_seconds: 5,
27
+ logger: Deimos.config.logger)
28
+ signal_handler = Sigurd::SignalHandler.new(executor)
29
+ signal_handler.run!
30
+ end
31
+
32
+ # @param config [Deimos::Configuration::ConfigStruct]
33
+ def initialize(config)
34
+ @config = config
35
+ @id = SecureRandom.hex
36
+ begin
37
+ @producer = @config.producer_class.constantize
38
+ rescue NameError
39
+ raise "Class #{@config.producer_class} not found!"
40
+ end
41
+ unless @producer < Deimos::ActiveRecordProducer
42
+ raise "Class #{@producer.class.name} is not an ActiveRecordProducer!"
43
+ end
44
+ end
45
+
46
+ # Start the poll:
47
+ # 1) Grab the current PollInfo from the database indicating the last
48
+ # time we ran
49
+ # 2) On a loop, process all the recent updates between the last time
50
+ # we ran and now.
51
+ def start
52
+ # Don't send asynchronously
53
+ if Deimos.config.producers.backend == :kafka_async
54
+ Deimos.config.producers.backend = :kafka
55
+ end
56
+ Deimos.config.logger.info('Starting...')
57
+ @signal_to_stop = false
58
+ retrieve_poll_info
59
+ loop do
60
+ if @signal_to_stop
61
+ Deimos.config.logger.info('Shutting down')
62
+ break
63
+ end
64
+ process_updates
65
+ sleep 0.1
66
+ end
67
+ end
68
+
69
+ # Grab the PollInfo or create if it doesn't exist.
70
+ def retrieve_poll_info
71
+ ActiveRecord::Base.connection.reconnect!
72
+ new_time = @config.start_from_beginning ? Time.new(0) : Time.zone.now
73
+ @info = Deimos::PollInfo.find_by_producer(@config.producer_class) ||
74
+ Deimos::PollInfo.create!(producer: @config.producer_class,
75
+ last_sent: new_time,
76
+ last_sent_id: 0)
77
+ end
78
+
79
+ # Stop the poll.
80
+ def stop
81
+ Deimos.config.logger.info('Received signal to stop')
82
+ @signal_to_stop = true
83
+ end
84
+
85
+ # Indicate whether this current loop should process updates. Most loops
86
+ # will busy-wait (sleeping 0.1 seconds) until it's ready.
87
+ # @return [Boolean]
88
+ def should_run?
89
+ Time.zone.now - @info.last_sent - @config.delay_time >= @config.run_every
90
+ end
91
+
92
+ # @param record [ActiveRecord::Base]
93
+ # @return [ActiveSupport::TimeWithZone]
94
+ def last_updated(record)
95
+ record.public_send(@config.timestamp_column)
96
+ end
97
+
98
+ # Send messages for updated data.
99
+ def process_updates
100
+ return unless should_run?
101
+
102
+ time_from = @config.full_table ? Time.new(0) : @info.last_sent.in_time_zone
103
+ time_to = Time.zone.now - @config.delay_time
104
+ Deimos.config.logger.info("Polling #{@producer.topic} from #{time_from} to #{time_to}")
105
+ message_count = 0
106
+ batch_count = 0
107
+
108
+ # poll_query gets all the relevant data from the database, as defined
109
+ # by the producer itself.
110
+ loop do
111
+ Deimos.config.logger.debug("Polling #{@producer.topic}, batch #{batch_count + 1}")
112
+ batch = fetch_results(time_from, time_to).to_a
113
+ break if batch.empty?
114
+
115
+ batch_count += 1
116
+ process_batch(batch)
117
+ message_count += batch.size
118
+ time_from = last_updated(batch.last)
119
+ end
120
+ Deimos.config.logger.info("Poll #{@producer.topic} complete at #{time_to} (#{message_count} messages, #{batch_count} batches}")
121
+ end
122
+
123
+ # @param time_from [ActiveSupport::TimeWithZone]
124
+ # @param time_to [ActiveSupport::TimeWithZone]
125
+ # @return [ActiveRecord::Relation]
126
+ def fetch_results(time_from, time_to)
127
+ id = @producer.config[:record_class].primary_key
128
+ quoted_timestamp = ActiveRecord::Base.connection.quote_column_name(@config.timestamp_column)
129
+ quoted_id = ActiveRecord::Base.connection.quote_column_name(id)
130
+ @producer.poll_query(time_from: time_from,
131
+ time_to: time_to,
132
+ column_name: @config.timestamp_column,
133
+ min_id: @info.last_sent_id).
134
+ limit(BATCH_SIZE).
135
+ order("#{quoted_timestamp}, #{quoted_id}")
136
+ end
137
+
138
+ # @param batch [Array<ActiveRecord::Base>]
139
+ def process_batch(batch)
140
+ record = batch.last
141
+ id_method = record.class.primary_key
142
+ last_id = record.public_send(id_method)
143
+ last_updated_at = last_updated(record)
144
+ @producer.send_events(batch)
145
+ @info.update_attributes!(last_sent: last_updated_at, last_sent_id: last_id)
146
+ end
147
+ end
148
+ end
149
+ end
@@ -2,7 +2,8 @@
2
2
 
3
3
  module Deimos
4
4
  module Utils
5
- # Class which continually polls the database and sends Kafka messages.
5
+ # Class which continually polls the kafka_messages table
6
+ # in the database and sends Kafka messages.
6
7
  class DbProducer
7
8
  include Phobos::Producer
8
9
  attr_accessor :id, :current_topic
@@ -86,9 +87,9 @@ module Deimos
86
87
  begin
87
88
  produce_messages(compacted_messages.map(&:phobos_message))
88
89
  rescue Kafka::BufferOverflow, Kafka::MessageSizeTooLarge, Kafka::RecordListTooLarge
89
- Deimos::KafkaMessage.where(id: messages.map(&:id)).delete_all
90
90
  @logger.error('Message batch too large, deleting...')
91
91
  @logger.error(Deimos::KafkaMessage.decoded(messages))
92
+ Deimos::KafkaMessage.where(id: messages.map(&:id)).delete_all
92
93
  raise
93
94
  end
94
95
  end
@@ -132,7 +133,11 @@ module Deimos
132
133
  metrics.gauge('pending_db_messages_max_wait', 0)
133
134
  end
134
135
  messages.each do |record|
135
- time_diff = Time.zone.now - record.earliest
136
+ earliest = record.earliest
137
+ # SQLite gives a string here
138
+ earliest = Time.zone.parse(earliest) if earliest.is_a?(String)
139
+
140
+ time_diff = Time.zone.now - earliest
136
141
  metrics.gauge('pending_db_messages_max_wait', time_diff,
137
142
  tags: ["topic:#{record.topic}"])
138
143
  end
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ module Utils
5
+ # Utility class to retry a given block if a a deadlock is encountered.
6
+ # Supports Postgres and MySQL deadlocks and lock wait timeouts.
7
+ class DeadlockRetry
8
+ class << self
9
+ # Maximum number of times to retry the block after encountering a deadlock
10
+ RETRY_COUNT = 2
11
+
12
+ # Need to match on error messages to support older Rails versions
13
+ DEADLOCK_MESSAGES = [
14
+ # MySQL
15
+ 'Deadlock found when trying to get lock',
16
+ 'Lock wait timeout exceeded',
17
+
18
+ # Postgres
19
+ 'deadlock detected'
20
+ ].freeze
21
+
22
+ # Retry the given block when encountering a deadlock. For any other
23
+ # exceptions, they are reraised. This is used to handle cases where
24
+ # the database may be busy but the transaction would succeed if
25
+ # retried later. Note that your block should be idempotent and it will
26
+ # be wrapped in a transaction.
27
+ # Sleeps for a random number of seconds to prevent multiple transactions
28
+ # from retrying at the same time.
29
+ # @param tags [Array] Tags to attach when logging and reporting metrics.
30
+ # @yield Yields to the block that may deadlock.
31
+ def wrap(tags=[])
32
+ count = RETRY_COUNT
33
+
34
+ begin
35
+ ActiveRecord::Base.transaction do
36
+ yield
37
+ end
38
+ rescue ActiveRecord::StatementInvalid => e
39
+ # Reraise if not a known deadlock
40
+ raise if DEADLOCK_MESSAGES.none? { |m| e.message.include?(m) }
41
+
42
+ # Reraise if all retries exhausted
43
+ raise if count <= 0
44
+
45
+ Deimos.config.logger.warn(
46
+ message: 'Deadlock encountered when trying to execute query. '\
47
+ "Retrying. #{count} attempt(s) remaining",
48
+ tags: tags
49
+ )
50
+
51
+ Deimos.config.metrics&.increment(
52
+ 'deadlock',
53
+ tags: tags
54
+ )
55
+
56
+ count -= 1
57
+
58
+ # Sleep for a random amount so that if there are multiple
59
+ # transactions deadlocking, they don't all retry at the same time
60
+ sleep(Random.rand(5.0) + 0.5)
61
+
62
+ retry
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
68
+ end
@@ -29,30 +29,21 @@ module Deimos
29
29
  self.topics[topic.to_s].report_lag(partition)
30
30
  end
31
31
 
32
- # @param topic [String]
33
- # @param partition [Integer]
34
- # @param lag [Integer]
35
- def assign_lag(topic, partition, lag)
36
- self.topics[topic.to_s] ||= Topic.new(topic, self)
37
- self.topics[topic.to_s].assign_lag(partition, lag)
38
- end
39
-
40
- # Figure out the current lag by asking Kafka based on the current offset.
41
32
  # @param topic [String]
42
33
  # @param partition [Integer]
43
34
  # @param offset [Integer]
44
- def compute_lag(topic, partition, offset)
35
+ def assign_current_offset(topic, partition, offset)
45
36
  self.topics[topic.to_s] ||= Topic.new(topic, self)
46
- self.topics[topic.to_s].compute_lag(partition, offset)
37
+ self.topics[topic.to_s].assign_current_offset(partition, offset)
47
38
  end
48
39
  end
49
40
 
50
- # Topic which has a hash of partition => last known offset lag
41
+ # Topic which has a hash of partition => last known current offsets
51
42
  class Topic
52
43
  # @return [String]
53
44
  attr_accessor :topic_name
54
45
  # @return [Hash<Integer, Integer>]
55
- attr_accessor :partition_offset_lags
46
+ attr_accessor :partition_current_offsets
56
47
  # @return [ConsumerGroup]
57
48
  attr_accessor :consumer_group
58
49
 
@@ -61,35 +52,33 @@ module Deimos
61
52
  def initialize(topic_name, group)
62
53
  self.topic_name = topic_name
63
54
  self.consumer_group = group
64
- self.partition_offset_lags = {}
55
+ self.partition_current_offsets = {}
65
56
  end
66
57
 
67
58
  # @param partition [Integer]
68
- # @param lag [Integer]
69
- def assign_lag(partition, lag)
70
- self.partition_offset_lags[partition.to_i] = lag
59
+ def assign_current_offset(partition, offset)
60
+ self.partition_current_offsets[partition.to_i] = offset
71
61
  end
72
62
 
73
63
  # @param partition [Integer]
74
- # @param offset [Integer]
75
64
  def compute_lag(partition, offset)
76
- return if self.partition_offset_lags[partition.to_i]
77
-
78
65
  begin
79
66
  client = Phobos.create_kafka_client
80
67
  last_offset = client.last_offset_for(self.topic_name, partition)
81
- assign_lag(partition, [last_offset - offset, 0].max)
68
+ lag = last_offset - offset
82
69
  rescue StandardError # don't do anything, just wait
83
70
  Deimos.config.logger.
84
71
  debug("Error computing lag for #{self.topic_name}, will retry")
85
72
  end
73
+ lag || 0
86
74
  end
87
75
 
88
76
  # @param partition [Integer]
89
77
  def report_lag(partition)
90
- lag = self.partition_offset_lags[partition.to_i]
91
- return unless lag
78
+ current_offset = self.partition_current_offsets[partition.to_i]
79
+ return unless current_offset
92
80
 
81
+ lag = compute_lag(partition, current_offset)
93
82
  group = self.consumer_group.id
94
83
  Deimos.config.logger.
95
84
  debug("Sending lag: #{group}/#{partition}: #{lag}")
@@ -109,16 +98,20 @@ module Deimos
109
98
  @groups = {}
110
99
  end
111
100
 
101
+ # offset_lag = event.payload.fetch(:offset_lag)
102
+ # group_id = event.payload.fetch(:group_id)
103
+ # topic = event.payload.fetch(:topic)
104
+ # partition = event.payload.fetch(:partition)
112
105
  # @param payload [Hash]
113
106
  def message_processed(payload)
114
- lag = payload[:offset_lag]
107
+ offset = payload[:offset] || payload[:last_offset]
115
108
  topic = payload[:topic]
116
109
  group = payload[:group_id]
117
110
  partition = payload[:partition]
118
111
 
119
112
  synchronize do
120
113
  @groups[group.to_s] ||= ConsumerGroup.new(group)
121
- @groups[group.to_s].assign_lag(topic, partition, lag)
114
+ @groups[group.to_s].assign_current_offset(topic, partition, offset)
122
115
  end
123
116
  end
124
117
 
@@ -131,7 +124,7 @@ module Deimos
131
124
 
132
125
  synchronize do
133
126
  @groups[group.to_s] ||= ConsumerGroup.new(group)
134
- @groups[group.to_s].compute_lag(topic, partition, offset)
127
+ @groups[group.to_s].assign_current_offset(topic, partition, offset)
135
128
  end
136
129
  end
137
130