deimos-ruby 1.6.1 → 1.8.0.pre.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +9 -0
  3. data/.rubocop.yml +15 -13
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +30 -0
  6. data/Gemfile.lock +87 -80
  7. data/README.md +139 -15
  8. data/Rakefile +1 -1
  9. data/deimos-ruby.gemspec +3 -2
  10. data/docs/ARCHITECTURE.md +144 -0
  11. data/docs/CONFIGURATION.md +27 -0
  12. data/lib/deimos.rb +7 -6
  13. data/lib/deimos/active_record_consume/batch_consumption.rb +159 -0
  14. data/lib/deimos/active_record_consume/batch_slicer.rb +27 -0
  15. data/lib/deimos/active_record_consume/message_consumption.rb +58 -0
  16. data/lib/deimos/active_record_consume/schema_model_converter.rb +52 -0
  17. data/lib/deimos/active_record_consumer.rb +33 -75
  18. data/lib/deimos/active_record_producer.rb +23 -0
  19. data/lib/deimos/batch_consumer.rb +2 -140
  20. data/lib/deimos/config/configuration.rb +28 -10
  21. data/lib/deimos/consume/batch_consumption.rb +148 -0
  22. data/lib/deimos/consume/message_consumption.rb +93 -0
  23. data/lib/deimos/consumer.rb +79 -69
  24. data/lib/deimos/kafka_message.rb +1 -1
  25. data/lib/deimos/kafka_source.rb +29 -23
  26. data/lib/deimos/kafka_topic_info.rb +1 -1
  27. data/lib/deimos/message.rb +6 -1
  28. data/lib/deimos/metrics/provider.rb +0 -2
  29. data/lib/deimos/poll_info.rb +9 -0
  30. data/lib/deimos/tracing/provider.rb +0 -2
  31. data/lib/deimos/utils/db_poller.rb +149 -0
  32. data/lib/deimos/utils/db_producer.rb +8 -3
  33. data/lib/deimos/utils/deadlock_retry.rb +68 -0
  34. data/lib/deimos/utils/lag_reporter.rb +19 -26
  35. data/lib/deimos/version.rb +1 -1
  36. data/lib/generators/deimos/db_poller/templates/migration +11 -0
  37. data/lib/generators/deimos/db_poller/templates/rails3_migration +16 -0
  38. data/lib/generators/deimos/db_poller_generator.rb +48 -0
  39. data/lib/tasks/deimos.rake +7 -0
  40. data/spec/active_record_batch_consumer_spec.rb +481 -0
  41. data/spec/active_record_consume/batch_slicer_spec.rb +42 -0
  42. data/spec/active_record_consume/schema_model_converter_spec.rb +105 -0
  43. data/spec/active_record_consumer_spec.rb +22 -11
  44. data/spec/active_record_producer_spec.rb +66 -88
  45. data/spec/batch_consumer_spec.rb +23 -7
  46. data/spec/config/configuration_spec.rb +4 -0
  47. data/spec/consumer_spec.rb +8 -8
  48. data/spec/deimos_spec.rb +57 -49
  49. data/spec/handlers/my_batch_consumer.rb +6 -1
  50. data/spec/handlers/my_consumer.rb +6 -1
  51. data/spec/kafka_source_spec.rb +53 -0
  52. data/spec/message_spec.rb +19 -0
  53. data/spec/producer_spec.rb +3 -3
  54. data/spec/rake_spec.rb +1 -1
  55. data/spec/schemas/com/my-namespace/MySchemaCompound-key.avsc +18 -0
  56. data/spec/schemas/com/my-namespace/Wibble.avsc +43 -0
  57. data/spec/spec_helper.rb +61 -6
  58. data/spec/utils/db_poller_spec.rb +320 -0
  59. data/spec/utils/deadlock_retry_spec.rb +74 -0
  60. data/spec/utils/lag_reporter_spec.rb +29 -22
  61. metadata +61 -20
  62. data/lib/deimos/base_consumer.rb +0 -104
  63. data/lib/deimos/utils/executor.rb +0 -124
  64. data/lib/deimos/utils/platform_schema_validation.rb +0 -0
  65. data/lib/deimos/utils/signal_handler.rb +0 -68
  66. data/spec/utils/executor_spec.rb +0 -53
  67. data/spec/utils/signal_handler_spec.rb +0 -16
@@ -0,0 +1,159 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'deimos/active_record_consume/batch_slicer'
4
+ require 'deimos/utils/deadlock_retry'
5
+ require 'deimos/message'
6
+
7
+ module Deimos
8
+ module ActiveRecordConsume
9
+ # Methods for consuming batches of messages and saving them to the database
10
+ # in bulk ActiveRecord operations.
11
+ module BatchConsumption
12
+ # Handle a batch of Kafka messages. Batches are split into "slices",
13
+ # which are groups of independent messages that can be processed together
14
+ # in a single database operation.
15
+ # If two messages in a batch have the same key, we cannot process them
16
+ # in the same operation as they would interfere with each other. Thus
17
+ # they are split
18
+ # @param payloads [Array<Hash>] Decoded payloads.
19
+ # @param metadata [Hash] Information about batch, including keys.
20
+ def consume_batch(payloads, metadata)
21
+ messages = payloads.
22
+ zip(metadata[:keys]).
23
+ map { |p, k| Deimos::Message.new(p, nil, key: k) }
24
+
25
+ tags = %W(topic:#{metadata[:topic]})
26
+
27
+ Deimos.instrument('ar_consumer.consume_batch', tags) do
28
+ # The entire batch should be treated as one transaction so that if
29
+ # any message fails, the whole thing is rolled back or retried
30
+ # if there is deadlock
31
+ Deimos::Utils::DeadlockRetry.wrap(tags) do
32
+ if @compacted || self.class.config[:no_keys]
33
+ update_database(compact_messages(messages))
34
+ else
35
+ uncompacted_update(messages)
36
+ end
37
+ end
38
+ end
39
+ end
40
+
41
+ # Get unique key for the ActiveRecord instance from the incoming key.
42
+ # Override this method (with super) to customize the set of attributes that
43
+ # uniquely identifies each record in the database.
44
+ # @param key [String] The encoded key.
45
+ # @return [Hash] The key attributes.
46
+ def record_key(key)
47
+ decoded_key = decode_key(key)
48
+
49
+ if decoded_key.nil?
50
+ {}
51
+ elsif decoded_key.is_a?(Hash)
52
+ @key_converter.convert(decoded_key)
53
+ else
54
+ { @klass.primary_key => decoded_key }
55
+ end
56
+ end
57
+
58
+ protected
59
+
60
+ # Perform database operations for a batch of messages without compaction.
61
+ # All messages are split into slices containing only unique keys, and
62
+ # each slice is handles as its own batch.
63
+ # @param messages [Array<Message>] List of messages.
64
+ def uncompacted_update(messages)
65
+ BatchSlicer.
66
+ slice(messages).
67
+ each(&method(:update_database))
68
+ end
69
+
70
+ # Perform database operations for a group of messages.
71
+ # All messages with payloads are passed to upsert_records.
72
+ # All tombstones messages are passed to remove_records.
73
+ # @param messages [Array<Message>] List of messages.
74
+ def update_database(messages)
75
+ # Find all upserted records (i.e. that have a payload) and all
76
+ # deleted record (no payload)
77
+ removed, upserted = messages.partition(&:tombstone?)
78
+
79
+ upsert_records(upserted) if upserted.any?
80
+ remove_records(removed) if removed.any?
81
+ end
82
+
83
+ # Upsert any non-deleted records
84
+ # @param messages [Array<Message>] List of messages for a group of
85
+ # records to either be updated or inserted.
86
+ def upsert_records(messages)
87
+ key_cols = key_columns(messages)
88
+
89
+ # Create payloads with payload + key attributes
90
+ upserts = messages.map do |m|
91
+ record_attributes(m.payload, m.key)&.
92
+ merge(record_key(m.key))
93
+ end
94
+
95
+ # If overridden record_attributes indicated no record, skip
96
+ upserts.compact!
97
+
98
+ options = if key_cols.empty?
99
+ {} # Can't upsert with no key, just do regular insert
100
+ else
101
+ {
102
+ on_duplicate_key_update: {
103
+ # conflict_target must explicitly list the columns for
104
+ # Postgres and SQLite. Not required for MySQL, but this
105
+ # ensures consistent behaviour.
106
+ conflict_target: key_cols,
107
+ columns: :all
108
+ }
109
+ }
110
+ end
111
+
112
+ @klass.import!(upserts, options)
113
+ end
114
+
115
+ # Delete any records with a tombstone.
116
+ # @param messages [Array<Message>] List of messages for a group of
117
+ # deleted records.
118
+ def remove_records(messages)
119
+ clause = deleted_query(messages)
120
+
121
+ clause.delete_all
122
+ end
123
+
124
+ # Create an ActiveRecord relation that matches all of the passed
125
+ # records. Used for bulk deletion.
126
+ # @param records [Array<Message>] List of messages.
127
+ # @return ActiveRecord::Relation Matching relation.
128
+ def deleted_query(records)
129
+ keys = records.
130
+ map { |m| record_key(m.key)[@klass.primary_key] }.
131
+ reject(&:nil?)
132
+
133
+ @klass.unscoped.where(@klass.primary_key => keys)
134
+ end
135
+
136
+ # Get the set of attribute names that uniquely identify messages in the
137
+ # batch. Requires at least one record.
138
+ # @param records [Array<Message>] Non-empty list of messages.
139
+ # @return [Array<String>] List of attribute names.
140
+ # @raise If records is empty.
141
+ def key_columns(records)
142
+ raise 'Cannot determine key from empty batch' if records.empty?
143
+
144
+ first_key = records.first.key
145
+ record_key(first_key).keys
146
+ end
147
+
148
+ # Compact a batch of messages, taking only the last message for each
149
+ # unique key.
150
+ # @param batch [Array<Message>] Batch of messages.
151
+ # @return [Array<Message>] Compacted batch.
152
+ def compact_messages(batch)
153
+ return batch unless batch.first&.key.present?
154
+
155
+ batch.reverse.uniq(&:key).reverse!
156
+ end
157
+ end
158
+ end
159
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ module ActiveRecordConsume
5
+ # Helper class for breaking down batches into independent groups for
6
+ # processing
7
+ class BatchSlicer
8
+ # Split the batch into a series of independent slices. Each slice contains
9
+ # messages that can be processed in any order (i.e. they have distinct
10
+ # keys). Messages with the same key will be separated into different
11
+ # slices that maintain the correct order.
12
+ # E.g. Given messages A1, A2, B1, C1, C2, C3, they will be sliced as:
13
+ # [[A1, B1, C1], [A2, C2], [C3]]
14
+ def self.slice(messages)
15
+ ops = messages.group_by(&:key)
16
+
17
+ # Find maximum depth
18
+ depth = ops.values.map(&:length).max || 0
19
+
20
+ # Generate slices for each depth
21
+ depth.times.map do |i|
22
+ ops.values.map { |arr| arr.dig(i) }.compact
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ module ActiveRecordConsume
5
+ # Methods for consuming individual messages and saving them to the database
6
+ # as ActiveRecord instances.
7
+ module MessageConsumption
8
+ # Find the record specified by the given payload and key.
9
+ # Default is to use the primary key column and the value of the first
10
+ # field in the key.
11
+ # @param klass [Class < ActiveRecord::Base]
12
+ # @param _payload [Hash]
13
+ # @param key [Object]
14
+ # @return [ActiveRecord::Base]
15
+ def fetch_record(klass, _payload, key)
16
+ klass.unscoped.where(klass.primary_key => key).first
17
+ end
18
+
19
+ # Assign a key to a new record.
20
+ # @param record [ActiveRecord::Base]
21
+ # @param _payload [Hash]
22
+ # @param key [Object]
23
+ def assign_key(record, _payload, key)
24
+ record[record.class.primary_key] = key
25
+ end
26
+
27
+ # :nodoc:
28
+ def consume(payload, metadata)
29
+ key = metadata.with_indifferent_access[:key]
30
+ klass = self.class.config[:record_class]
31
+ record = fetch_record(klass, (payload || {}).with_indifferent_access, key)
32
+ if payload.nil?
33
+ destroy_record(record)
34
+ return
35
+ end
36
+ if record.blank?
37
+ record = klass.new
38
+ assign_key(record, payload, key)
39
+ end
40
+ attrs = record_attributes(payload.with_indifferent_access, key)
41
+ # don't use attributes= - bypass Rails < 5 attr_protected
42
+ attrs.each do |k, v|
43
+ record.send("#{k}=", v)
44
+ end
45
+ record.created_at ||= Time.zone.now if record.respond_to?(:created_at)
46
+ record.updated_at = Time.zone.now if record.respond_to?(:updated_at)
47
+ record.save!
48
+ end
49
+
50
+ # Destroy a record that received a null payload. Override if you need
51
+ # to do something other than a straight destroy (e.g. mark as archived).
52
+ # @param record [ActiveRecord::Base]
53
+ def destroy_record(record)
54
+ record&.destroy
55
+ end
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ module ActiveRecordConsume
5
+ # Convert a message with a schema to an ActiveRecord model
6
+ class SchemaModelConverter
7
+ # Create new converter
8
+ # @param decoder [SchemaBackends::Base] Incoming message schema.
9
+ # @param klass [ActiveRecord::Base] Model to map to.
10
+ def initialize(decoder, klass)
11
+ @decoder = decoder
12
+ @klass = klass
13
+ end
14
+
15
+ # Convert a message from a decoded hash to a set of ActiveRecord
16
+ # attributes. Attributes that don't exist in the model will be ignored.
17
+ # @param payload [Hash] Decoded message payload.
18
+ # @return [Hash] Model attributes.
19
+ def convert(payload)
20
+ attributes = {}
21
+ @decoder.schema_fields.each do |field|
22
+ column = @klass.columns.find { |c| c.name == field.name }
23
+ next if column.nil?
24
+ next if %w(updated_at created_at).include?(field.name)
25
+
26
+ attributes[field.name] = _coerce_field(column, payload[field.name])
27
+ end
28
+ attributes
29
+ end
30
+
31
+ private
32
+
33
+ # @param column [ActiveRecord::ConnectionAdapters::Column]
34
+ # @param val [Object]
35
+ def _coerce_field(column, val)
36
+ return nil if val.nil?
37
+
38
+ if column.type == :datetime
39
+ int_val = begin
40
+ val.is_a?(Integer) ? val : (val.is_a?(String) && Integer(val))
41
+ rescue StandardError
42
+ nil
43
+ end
44
+
45
+ return Time.zone.at(int_val) if int_val
46
+ end
47
+
48
+ val
49
+ end
50
+ end
51
+ end
52
+ end
@@ -1,101 +1,59 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'deimos/active_record_consume/batch_consumption'
4
+ require 'deimos/active_record_consume/message_consumption'
5
+ require 'deimos/active_record_consume/schema_model_converter'
3
6
  require 'deimos/consumer'
4
7
 
5
8
  module Deimos
6
- # Consumer that automatically saves the payload into the database.
9
+ # Basic ActiveRecord consumer class. Consumes messages and upserts them to
10
+ # the database. For tombstones (null payloads), deletes corresponding
11
+ # records from the database. Can operate in either message-by-message mode
12
+ # or in batch mode.
13
+ #
14
+ # In batch mode, ActiveRecord callbacks will be skipped and messages will
15
+ # be batched to minimize database calls.
16
+
17
+ # To configure batch vs. message mode, change the delivery mode of your
18
+ # Phobos listener.
19
+ # Message-by-message -> use `delivery: message` or `delivery: batch`
20
+ # Batch -> use `delivery: inline_batch`
7
21
  class ActiveRecordConsumer < Consumer
22
+ include ActiveRecordConsume::MessageConsumption
23
+ include ActiveRecordConsume::BatchConsumption
24
+
8
25
  class << self
9
26
  # param klass [Class < ActiveRecord::Base] the class used to save to the
10
27
  # database.
11
28
  def record_class(klass)
12
29
  config[:record_class] = klass
13
30
  end
14
- end
15
31
 
16
- # Find the record specified by the given payload and key.
17
- # Default is to use the primary key column and the value of the first
18
- # field in the key.
19
- # @param klass [Class < ActiveRecord::Base]
20
- # @param _payload [Hash]
21
- # @param key [Object]
22
- # @return [ActiveRecord::Base]
23
- def fetch_record(klass, _payload, key)
24
- klass.unscoped.where(klass.primary_key => key).first
32
+ # param val [Boolean] Turn pre-compaction of the batch on or off. If true,
33
+ # only the last message for each unique key in a batch is processed.
34
+ def compacted(val)
35
+ config[:compacted] = val
36
+ end
25
37
  end
26
38
 
27
- # Assign a key to a new record.
28
- # @param record [ActiveRecord::Base]
29
- # @param _payload [Hash]
30
- # @param key [Object]
31
- def assign_key(record, _payload, key)
32
- record[record.class.primary_key] = key
33
- end
39
+ # Setup
40
+ def initialize
41
+ @klass = self.class.config[:record_class]
42
+ @converter = ActiveRecordConsume::SchemaModelConverter.new(self.class.decoder, @klass)
34
43
 
35
- # :nodoc:
36
- def consume(payload, metadata)
37
- key = metadata.with_indifferent_access[:key]
38
- klass = self.class.config[:record_class]
39
- record = fetch_record(klass, (payload || {}).with_indifferent_access, key)
40
- if payload.nil?
41
- destroy_record(record)
42
- return
44
+ if self.class.config[:key_schema]
45
+ @key_converter = ActiveRecordConsume::SchemaModelConverter.new(self.class.key_decoder, @klass)
43
46
  end
44
- if record.blank?
45
- record = klass.new
46
- assign_key(record, payload, key)
47
- end
48
- attrs = record_attributes(payload.with_indifferent_access)
49
- # don't use attributes= - bypass Rails < 5 attr_protected
50
- attrs.each do |k, v|
51
- record.send("#{k}=", v)
52
- end
53
- record.created_at ||= Time.zone.now if record.respond_to?(:created_at)
54
- record.updated_at ||= Time.zone.now if record.respond_to?(:updated_at)
55
- record.save!
56
- end
57
47
 
58
- # Destroy a record that received a null payload. Override if you need
59
- # to do something other than a straight destroy (e.g. mark as archived).
60
- # @param record [ActiveRecord::Base]
61
- def destroy_record(record)
62
- record&.destroy
48
+ @compacted = self.class.config[:compacted] != false
63
49
  end
64
50
 
65
51
  # Override this method (with `super`) if you want to add/change the default
66
52
  # attributes set to the new/existing record.
67
53
  # @param payload [Hash]
68
- def record_attributes(payload)
69
- klass = self.class.config[:record_class]
70
- attributes = {}
71
- self.class.decoder.schema_fields.each do |field|
72
- column = klass.columns.find { |c| c.name == field.name }
73
- next if column.nil?
74
- next if %w(updated_at created_at).include?(field.name)
75
-
76
- attributes[field.name] = _coerce_field(column, payload[field.name])
77
- end
78
- attributes
79
- end
80
-
81
- private
82
-
83
- # @param column [ActiveRecord::ConnectionAdapters::Column]
84
- # @param val [Object]
85
- def _coerce_field(column, val)
86
- return nil if val.nil?
87
-
88
- if column.type == :datetime
89
- int_val = begin
90
- val.is_a?(Integer) ? val : (val.is_a?(String) && Integer(val))
91
- rescue StandardError
92
- nil
93
- end
94
-
95
- return Time.zone.at(int_val) if int_val
96
- end
97
-
98
- val
54
+ # @param _key [String]
55
+ def record_attributes(payload, _key=nil)
56
+ @converter.convert(payload)
99
57
  end
100
58
  end
101
59
  end
@@ -59,6 +59,29 @@ module Deimos
59
59
  k.to_sym != :payload_key && !fields.map(&:name).include?(k)
60
60
  end
61
61
  end
62
+
63
+ # Query to use when polling the database with the DbPoller. Add
64
+ # includes, joins, or wheres as necessary, or replace entirely.
65
+ # @param time_from [Time] the time to start the query from.
66
+ # @param time_to [Time] the time to end the query.
67
+ # @param column_name [Symbol] the column name to look for.
68
+ # @param min_id [Numeric] the minimum ID (i.e. all IDs must be greater
69
+ # than this value).
70
+ # @return [ActiveRecord::Relation]
71
+ def poll_query(time_from:, time_to:, column_name: :updated_at, min_id:)
72
+ klass = config[:record_class]
73
+ table = ActiveRecord::Base.connection.quote_table_name(klass.table_name)
74
+ column = ActiveRecord::Base.connection.quote_column_name(column_name)
75
+ primary = ActiveRecord::Base.connection.quote_column_name(klass.primary_key)
76
+ klass.where(
77
+ "((#{table}.#{column} = ? AND #{table}.#{primary} > ?) \
78
+ OR #{table}.#{column} > ?) AND #{table}.#{column} <= ?",
79
+ time_from,
80
+ min_id,
81
+ time_from,
82
+ time_to
83
+ )
84
+ end
62
85
  end
63
86
  end
64
87
  end