deimos-ruby 1.6.1 → 1.8.0.pre.beta1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +9 -0
  3. data/.rubocop.yml +15 -13
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +30 -0
  6. data/Gemfile.lock +87 -80
  7. data/README.md +139 -15
  8. data/Rakefile +1 -1
  9. data/deimos-ruby.gemspec +3 -2
  10. data/docs/ARCHITECTURE.md +144 -0
  11. data/docs/CONFIGURATION.md +27 -0
  12. data/lib/deimos.rb +7 -6
  13. data/lib/deimos/active_record_consume/batch_consumption.rb +159 -0
  14. data/lib/deimos/active_record_consume/batch_slicer.rb +27 -0
  15. data/lib/deimos/active_record_consume/message_consumption.rb +58 -0
  16. data/lib/deimos/active_record_consume/schema_model_converter.rb +52 -0
  17. data/lib/deimos/active_record_consumer.rb +33 -75
  18. data/lib/deimos/active_record_producer.rb +23 -0
  19. data/lib/deimos/batch_consumer.rb +2 -140
  20. data/lib/deimos/config/configuration.rb +28 -10
  21. data/lib/deimos/consume/batch_consumption.rb +148 -0
  22. data/lib/deimos/consume/message_consumption.rb +93 -0
  23. data/lib/deimos/consumer.rb +79 -69
  24. data/lib/deimos/kafka_message.rb +1 -1
  25. data/lib/deimos/kafka_source.rb +29 -23
  26. data/lib/deimos/kafka_topic_info.rb +1 -1
  27. data/lib/deimos/message.rb +6 -1
  28. data/lib/deimos/metrics/provider.rb +0 -2
  29. data/lib/deimos/poll_info.rb +9 -0
  30. data/lib/deimos/tracing/provider.rb +0 -2
  31. data/lib/deimos/utils/db_poller.rb +149 -0
  32. data/lib/deimos/utils/db_producer.rb +8 -3
  33. data/lib/deimos/utils/deadlock_retry.rb +68 -0
  34. data/lib/deimos/utils/lag_reporter.rb +19 -26
  35. data/lib/deimos/version.rb +1 -1
  36. data/lib/generators/deimos/db_poller/templates/migration +11 -0
  37. data/lib/generators/deimos/db_poller/templates/rails3_migration +16 -0
  38. data/lib/generators/deimos/db_poller_generator.rb +48 -0
  39. data/lib/tasks/deimos.rake +7 -0
  40. data/spec/active_record_batch_consumer_spec.rb +481 -0
  41. data/spec/active_record_consume/batch_slicer_spec.rb +42 -0
  42. data/spec/active_record_consume/schema_model_converter_spec.rb +105 -0
  43. data/spec/active_record_consumer_spec.rb +22 -11
  44. data/spec/active_record_producer_spec.rb +66 -88
  45. data/spec/batch_consumer_spec.rb +23 -7
  46. data/spec/config/configuration_spec.rb +4 -0
  47. data/spec/consumer_spec.rb +8 -8
  48. data/spec/deimos_spec.rb +57 -49
  49. data/spec/handlers/my_batch_consumer.rb +6 -1
  50. data/spec/handlers/my_consumer.rb +6 -1
  51. data/spec/kafka_source_spec.rb +53 -0
  52. data/spec/message_spec.rb +19 -0
  53. data/spec/producer_spec.rb +3 -3
  54. data/spec/rake_spec.rb +1 -1
  55. data/spec/schemas/com/my-namespace/MySchemaCompound-key.avsc +18 -0
  56. data/spec/schemas/com/my-namespace/Wibble.avsc +43 -0
  57. data/spec/spec_helper.rb +61 -6
  58. data/spec/utils/db_poller_spec.rb +320 -0
  59. data/spec/utils/deadlock_retry_spec.rb +74 -0
  60. data/spec/utils/lag_reporter_spec.rb +29 -22
  61. metadata +61 -20
  62. data/lib/deimos/base_consumer.rb +0 -104
  63. data/lib/deimos/utils/executor.rb +0 -124
  64. data/lib/deimos/utils/platform_schema_validation.rb +0 -0
  65. data/lib/deimos/utils/signal_handler.rb +0 -68
  66. data/spec/utils/executor_spec.rb +0 -53
  67. data/spec/utils/signal_handler_spec.rb +0 -16
@@ -0,0 +1,159 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'deimos/active_record_consume/batch_slicer'
4
+ require 'deimos/utils/deadlock_retry'
5
+ require 'deimos/message'
6
+
7
+ module Deimos
8
+ module ActiveRecordConsume
9
+ # Methods for consuming batches of messages and saving them to the database
10
+ # in bulk ActiveRecord operations.
11
+ module BatchConsumption
12
+ # Handle a batch of Kafka messages. Batches are split into "slices",
13
+ # which are groups of independent messages that can be processed together
14
+ # in a single database operation.
15
+ # If two messages in a batch have the same key, we cannot process them
16
+ # in the same operation as they would interfere with each other. Thus
17
+ # they are split
18
+ # @param payloads [Array<Hash>] Decoded payloads.
19
+ # @param metadata [Hash] Information about batch, including keys.
20
+ def consume_batch(payloads, metadata)
21
+ messages = payloads.
22
+ zip(metadata[:keys]).
23
+ map { |p, k| Deimos::Message.new(p, nil, key: k) }
24
+
25
+ tags = %W(topic:#{metadata[:topic]})
26
+
27
+ Deimos.instrument('ar_consumer.consume_batch', tags) do
28
+ # The entire batch should be treated as one transaction so that if
29
+ # any message fails, the whole thing is rolled back or retried
30
+ # if there is deadlock
31
+ Deimos::Utils::DeadlockRetry.wrap(tags) do
32
+ if @compacted || self.class.config[:no_keys]
33
+ update_database(compact_messages(messages))
34
+ else
35
+ uncompacted_update(messages)
36
+ end
37
+ end
38
+ end
39
+ end
40
+
41
+ # Get unique key for the ActiveRecord instance from the incoming key.
42
+ # Override this method (with super) to customize the set of attributes that
43
+ # uniquely identifies each record in the database.
44
+ # @param key [String] The encoded key.
45
+ # @return [Hash] The key attributes.
46
+ def record_key(key)
47
+ decoded_key = decode_key(key)
48
+
49
+ if decoded_key.nil?
50
+ {}
51
+ elsif decoded_key.is_a?(Hash)
52
+ @key_converter.convert(decoded_key)
53
+ else
54
+ { @klass.primary_key => decoded_key }
55
+ end
56
+ end
57
+
58
+ protected
59
+
60
+ # Perform database operations for a batch of messages without compaction.
61
+ # All messages are split into slices containing only unique keys, and
62
+ # each slice is handles as its own batch.
63
+ # @param messages [Array<Message>] List of messages.
64
+ def uncompacted_update(messages)
65
+ BatchSlicer.
66
+ slice(messages).
67
+ each(&method(:update_database))
68
+ end
69
+
70
+ # Perform database operations for a group of messages.
71
+ # All messages with payloads are passed to upsert_records.
72
+ # All tombstones messages are passed to remove_records.
73
+ # @param messages [Array<Message>] List of messages.
74
+ def update_database(messages)
75
+ # Find all upserted records (i.e. that have a payload) and all
76
+ # deleted record (no payload)
77
+ removed, upserted = messages.partition(&:tombstone?)
78
+
79
+ upsert_records(upserted) if upserted.any?
80
+ remove_records(removed) if removed.any?
81
+ end
82
+
83
+ # Upsert any non-deleted records
84
+ # @param messages [Array<Message>] List of messages for a group of
85
+ # records to either be updated or inserted.
86
+ def upsert_records(messages)
87
+ key_cols = key_columns(messages)
88
+
89
+ # Create payloads with payload + key attributes
90
+ upserts = messages.map do |m|
91
+ record_attributes(m.payload, m.key)&.
92
+ merge(record_key(m.key))
93
+ end
94
+
95
+ # If overridden record_attributes indicated no record, skip
96
+ upserts.compact!
97
+
98
+ options = if key_cols.empty?
99
+ {} # Can't upsert with no key, just do regular insert
100
+ else
101
+ {
102
+ on_duplicate_key_update: {
103
+ # conflict_target must explicitly list the columns for
104
+ # Postgres and SQLite. Not required for MySQL, but this
105
+ # ensures consistent behaviour.
106
+ conflict_target: key_cols,
107
+ columns: :all
108
+ }
109
+ }
110
+ end
111
+
112
+ @klass.import!(upserts, options)
113
+ end
114
+
115
+ # Delete any records with a tombstone.
116
+ # @param messages [Array<Message>] List of messages for a group of
117
+ # deleted records.
118
+ def remove_records(messages)
119
+ clause = deleted_query(messages)
120
+
121
+ clause.delete_all
122
+ end
123
+
124
+ # Create an ActiveRecord relation that matches all of the passed
125
+ # records. Used for bulk deletion.
126
+ # @param records [Array<Message>] List of messages.
127
+ # @return ActiveRecord::Relation Matching relation.
128
+ def deleted_query(records)
129
+ keys = records.
130
+ map { |m| record_key(m.key)[@klass.primary_key] }.
131
+ reject(&:nil?)
132
+
133
+ @klass.unscoped.where(@klass.primary_key => keys)
134
+ end
135
+
136
+ # Get the set of attribute names that uniquely identify messages in the
137
+ # batch. Requires at least one record.
138
+ # @param records [Array<Message>] Non-empty list of messages.
139
+ # @return [Array<String>] List of attribute names.
140
+ # @raise If records is empty.
141
+ def key_columns(records)
142
+ raise 'Cannot determine key from empty batch' if records.empty?
143
+
144
+ first_key = records.first.key
145
+ record_key(first_key).keys
146
+ end
147
+
148
+ # Compact a batch of messages, taking only the last message for each
149
+ # unique key.
150
+ # @param batch [Array<Message>] Batch of messages.
151
+ # @return [Array<Message>] Compacted batch.
152
+ def compact_messages(batch)
153
+ return batch unless batch.first&.key.present?
154
+
155
+ batch.reverse.uniq(&:key).reverse!
156
+ end
157
+ end
158
+ end
159
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ module ActiveRecordConsume
5
+ # Helper class for breaking down batches into independent groups for
6
+ # processing
7
+ class BatchSlicer
8
+ # Split the batch into a series of independent slices. Each slice contains
9
+ # messages that can be processed in any order (i.e. they have distinct
10
+ # keys). Messages with the same key will be separated into different
11
+ # slices that maintain the correct order.
12
+ # E.g. Given messages A1, A2, B1, C1, C2, C3, they will be sliced as:
13
+ # [[A1, B1, C1], [A2, C2], [C3]]
14
+ def self.slice(messages)
15
+ ops = messages.group_by(&:key)
16
+
17
+ # Find maximum depth
18
+ depth = ops.values.map(&:length).max || 0
19
+
20
+ # Generate slices for each depth
21
+ depth.times.map do |i|
22
+ ops.values.map { |arr| arr.dig(i) }.compact
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ module ActiveRecordConsume
5
+ # Methods for consuming individual messages and saving them to the database
6
+ # as ActiveRecord instances.
7
+ module MessageConsumption
8
+ # Find the record specified by the given payload and key.
9
+ # Default is to use the primary key column and the value of the first
10
+ # field in the key.
11
+ # @param klass [Class < ActiveRecord::Base]
12
+ # @param _payload [Hash]
13
+ # @param key [Object]
14
+ # @return [ActiveRecord::Base]
15
+ def fetch_record(klass, _payload, key)
16
+ klass.unscoped.where(klass.primary_key => key).first
17
+ end
18
+
19
+ # Assign a key to a new record.
20
+ # @param record [ActiveRecord::Base]
21
+ # @param _payload [Hash]
22
+ # @param key [Object]
23
+ def assign_key(record, _payload, key)
24
+ record[record.class.primary_key] = key
25
+ end
26
+
27
+ # :nodoc:
28
+ def consume(payload, metadata)
29
+ key = metadata.with_indifferent_access[:key]
30
+ klass = self.class.config[:record_class]
31
+ record = fetch_record(klass, (payload || {}).with_indifferent_access, key)
32
+ if payload.nil?
33
+ destroy_record(record)
34
+ return
35
+ end
36
+ if record.blank?
37
+ record = klass.new
38
+ assign_key(record, payload, key)
39
+ end
40
+ attrs = record_attributes(payload.with_indifferent_access, key)
41
+ # don't use attributes= - bypass Rails < 5 attr_protected
42
+ attrs.each do |k, v|
43
+ record.send("#{k}=", v)
44
+ end
45
+ record.created_at ||= Time.zone.now if record.respond_to?(:created_at)
46
+ record.updated_at = Time.zone.now if record.respond_to?(:updated_at)
47
+ record.save!
48
+ end
49
+
50
+ # Destroy a record that received a null payload. Override if you need
51
+ # to do something other than a straight destroy (e.g. mark as archived).
52
+ # @param record [ActiveRecord::Base]
53
+ def destroy_record(record)
54
+ record&.destroy
55
+ end
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ module ActiveRecordConsume
5
+ # Convert a message with a schema to an ActiveRecord model
6
+ class SchemaModelConverter
7
+ # Create new converter
8
+ # @param decoder [SchemaBackends::Base] Incoming message schema.
9
+ # @param klass [ActiveRecord::Base] Model to map to.
10
+ def initialize(decoder, klass)
11
+ @decoder = decoder
12
+ @klass = klass
13
+ end
14
+
15
+ # Convert a message from a decoded hash to a set of ActiveRecord
16
+ # attributes. Attributes that don't exist in the model will be ignored.
17
+ # @param payload [Hash] Decoded message payload.
18
+ # @return [Hash] Model attributes.
19
+ def convert(payload)
20
+ attributes = {}
21
+ @decoder.schema_fields.each do |field|
22
+ column = @klass.columns.find { |c| c.name == field.name }
23
+ next if column.nil?
24
+ next if %w(updated_at created_at).include?(field.name)
25
+
26
+ attributes[field.name] = _coerce_field(column, payload[field.name])
27
+ end
28
+ attributes
29
+ end
30
+
31
+ private
32
+
33
+ # @param column [ActiveRecord::ConnectionAdapters::Column]
34
+ # @param val [Object]
35
+ def _coerce_field(column, val)
36
+ return nil if val.nil?
37
+
38
+ if column.type == :datetime
39
+ int_val = begin
40
+ val.is_a?(Integer) ? val : (val.is_a?(String) && Integer(val))
41
+ rescue StandardError
42
+ nil
43
+ end
44
+
45
+ return Time.zone.at(int_val) if int_val
46
+ end
47
+
48
+ val
49
+ end
50
+ end
51
+ end
52
+ end
@@ -1,101 +1,59 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'deimos/active_record_consume/batch_consumption'
4
+ require 'deimos/active_record_consume/message_consumption'
5
+ require 'deimos/active_record_consume/schema_model_converter'
3
6
  require 'deimos/consumer'
4
7
 
5
8
  module Deimos
6
- # Consumer that automatically saves the payload into the database.
9
+ # Basic ActiveRecord consumer class. Consumes messages and upserts them to
10
+ # the database. For tombstones (null payloads), deletes corresponding
11
+ # records from the database. Can operate in either message-by-message mode
12
+ # or in batch mode.
13
+ #
14
+ # In batch mode, ActiveRecord callbacks will be skipped and messages will
15
+ # be batched to minimize database calls.
16
+
17
+ # To configure batch vs. message mode, change the delivery mode of your
18
+ # Phobos listener.
19
+ # Message-by-message -> use `delivery: message` or `delivery: batch`
20
+ # Batch -> use `delivery: inline_batch`
7
21
  class ActiveRecordConsumer < Consumer
22
+ include ActiveRecordConsume::MessageConsumption
23
+ include ActiveRecordConsume::BatchConsumption
24
+
8
25
  class << self
9
26
  # param klass [Class < ActiveRecord::Base] the class used to save to the
10
27
  # database.
11
28
  def record_class(klass)
12
29
  config[:record_class] = klass
13
30
  end
14
- end
15
31
 
16
- # Find the record specified by the given payload and key.
17
- # Default is to use the primary key column and the value of the first
18
- # field in the key.
19
- # @param klass [Class < ActiveRecord::Base]
20
- # @param _payload [Hash]
21
- # @param key [Object]
22
- # @return [ActiveRecord::Base]
23
- def fetch_record(klass, _payload, key)
24
- klass.unscoped.where(klass.primary_key => key).first
32
+ # param val [Boolean] Turn pre-compaction of the batch on or off. If true,
33
+ # only the last message for each unique key in a batch is processed.
34
+ def compacted(val)
35
+ config[:compacted] = val
36
+ end
25
37
  end
26
38
 
27
- # Assign a key to a new record.
28
- # @param record [ActiveRecord::Base]
29
- # @param _payload [Hash]
30
- # @param key [Object]
31
- def assign_key(record, _payload, key)
32
- record[record.class.primary_key] = key
33
- end
39
+ # Setup
40
+ def initialize
41
+ @klass = self.class.config[:record_class]
42
+ @converter = ActiveRecordConsume::SchemaModelConverter.new(self.class.decoder, @klass)
34
43
 
35
- # :nodoc:
36
- def consume(payload, metadata)
37
- key = metadata.with_indifferent_access[:key]
38
- klass = self.class.config[:record_class]
39
- record = fetch_record(klass, (payload || {}).with_indifferent_access, key)
40
- if payload.nil?
41
- destroy_record(record)
42
- return
44
+ if self.class.config[:key_schema]
45
+ @key_converter = ActiveRecordConsume::SchemaModelConverter.new(self.class.key_decoder, @klass)
43
46
  end
44
- if record.blank?
45
- record = klass.new
46
- assign_key(record, payload, key)
47
- end
48
- attrs = record_attributes(payload.with_indifferent_access)
49
- # don't use attributes= - bypass Rails < 5 attr_protected
50
- attrs.each do |k, v|
51
- record.send("#{k}=", v)
52
- end
53
- record.created_at ||= Time.zone.now if record.respond_to?(:created_at)
54
- record.updated_at ||= Time.zone.now if record.respond_to?(:updated_at)
55
- record.save!
56
- end
57
47
 
58
- # Destroy a record that received a null payload. Override if you need
59
- # to do something other than a straight destroy (e.g. mark as archived).
60
- # @param record [ActiveRecord::Base]
61
- def destroy_record(record)
62
- record&.destroy
48
+ @compacted = self.class.config[:compacted] != false
63
49
  end
64
50
 
65
51
  # Override this method (with `super`) if you want to add/change the default
66
52
  # attributes set to the new/existing record.
67
53
  # @param payload [Hash]
68
- def record_attributes(payload)
69
- klass = self.class.config[:record_class]
70
- attributes = {}
71
- self.class.decoder.schema_fields.each do |field|
72
- column = klass.columns.find { |c| c.name == field.name }
73
- next if column.nil?
74
- next if %w(updated_at created_at).include?(field.name)
75
-
76
- attributes[field.name] = _coerce_field(column, payload[field.name])
77
- end
78
- attributes
79
- end
80
-
81
- private
82
-
83
- # @param column [ActiveRecord::ConnectionAdapters::Column]
84
- # @param val [Object]
85
- def _coerce_field(column, val)
86
- return nil if val.nil?
87
-
88
- if column.type == :datetime
89
- int_val = begin
90
- val.is_a?(Integer) ? val : (val.is_a?(String) && Integer(val))
91
- rescue StandardError
92
- nil
93
- end
94
-
95
- return Time.zone.at(int_val) if int_val
96
- end
97
-
98
- val
54
+ # @param _key [String]
55
+ def record_attributes(payload, _key=nil)
56
+ @converter.convert(payload)
99
57
  end
100
58
  end
101
59
  end
@@ -59,6 +59,29 @@ module Deimos
59
59
  k.to_sym != :payload_key && !fields.map(&:name).include?(k)
60
60
  end
61
61
  end
62
+
63
+ # Query to use when polling the database with the DbPoller. Add
64
+ # includes, joins, or wheres as necessary, or replace entirely.
65
+ # @param time_from [Time] the time to start the query from.
66
+ # @param time_to [Time] the time to end the query.
67
+ # @param column_name [Symbol] the column name to look for.
68
+ # @param min_id [Numeric] the minimum ID (i.e. all IDs must be greater
69
+ # than this value).
70
+ # @return [ActiveRecord::Relation]
71
+ def poll_query(time_from:, time_to:, column_name: :updated_at, min_id:)
72
+ klass = config[:record_class]
73
+ table = ActiveRecord::Base.connection.quote_table_name(klass.table_name)
74
+ column = ActiveRecord::Base.connection.quote_column_name(column_name)
75
+ primary = ActiveRecord::Base.connection.quote_column_name(klass.primary_key)
76
+ klass.where(
77
+ "((#{table}.#{column} = ? AND #{table}.#{primary} > ?) \
78
+ OR #{table}.#{column} > ?) AND #{table}.#{column} <= ?",
79
+ time_from,
80
+ min_id,
81
+ time_from,
82
+ time_to
83
+ )
84
+ end
62
85
  end
63
86
  end
64
87
  end