deimos-ruby 1.6.3 → 1.8.1.pre.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +9 -0
  3. data/.rubocop.yml +22 -16
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +42 -0
  6. data/Gemfile.lock +125 -98
  7. data/README.md +164 -16
  8. data/Rakefile +1 -1
  9. data/deimos-ruby.gemspec +4 -3
  10. data/docs/ARCHITECTURE.md +144 -0
  11. data/docs/CONFIGURATION.md +27 -0
  12. data/lib/deimos.rb +8 -7
  13. data/lib/deimos/active_record_consume/batch_consumption.rb +159 -0
  14. data/lib/deimos/active_record_consume/batch_slicer.rb +27 -0
  15. data/lib/deimos/active_record_consume/message_consumption.rb +58 -0
  16. data/lib/deimos/active_record_consume/schema_model_converter.rb +52 -0
  17. data/lib/deimos/active_record_consumer.rb +33 -75
  18. data/lib/deimos/active_record_producer.rb +23 -0
  19. data/lib/deimos/batch_consumer.rb +2 -140
  20. data/lib/deimos/config/configuration.rb +28 -10
  21. data/lib/deimos/consume/batch_consumption.rb +150 -0
  22. data/lib/deimos/consume/message_consumption.rb +94 -0
  23. data/lib/deimos/consumer.rb +79 -70
  24. data/lib/deimos/kafka_message.rb +1 -1
  25. data/lib/deimos/kafka_topic_info.rb +22 -3
  26. data/lib/deimos/message.rb +6 -1
  27. data/lib/deimos/metrics/provider.rb +0 -2
  28. data/lib/deimos/poll_info.rb +9 -0
  29. data/lib/deimos/schema_backends/avro_base.rb +28 -1
  30. data/lib/deimos/schema_backends/base.rb +15 -2
  31. data/lib/deimos/tracing/provider.rb +0 -2
  32. data/lib/deimos/utils/db_poller.rb +149 -0
  33. data/lib/deimos/utils/db_producer.rb +59 -16
  34. data/lib/deimos/utils/deadlock_retry.rb +68 -0
  35. data/lib/deimos/utils/lag_reporter.rb +19 -26
  36. data/lib/deimos/version.rb +1 -1
  37. data/lib/generators/deimos/active_record/templates/migration.rb.tt +28 -0
  38. data/lib/generators/deimos/active_record/templates/model.rb.tt +5 -0
  39. data/lib/generators/deimos/active_record_generator.rb +79 -0
  40. data/lib/generators/deimos/db_backend/templates/migration +1 -0
  41. data/lib/generators/deimos/db_backend/templates/rails3_migration +1 -0
  42. data/lib/generators/deimos/db_poller/templates/migration +11 -0
  43. data/lib/generators/deimos/db_poller/templates/rails3_migration +16 -0
  44. data/lib/generators/deimos/db_poller_generator.rb +48 -0
  45. data/lib/tasks/deimos.rake +7 -0
  46. data/spec/active_record_batch_consumer_spec.rb +481 -0
  47. data/spec/active_record_consume/batch_slicer_spec.rb +42 -0
  48. data/spec/active_record_consume/schema_model_converter_spec.rb +105 -0
  49. data/spec/active_record_consumer_spec.rb +3 -11
  50. data/spec/active_record_producer_spec.rb +66 -88
  51. data/spec/batch_consumer_spec.rb +24 -7
  52. data/spec/config/configuration_spec.rb +4 -0
  53. data/spec/consumer_spec.rb +8 -8
  54. data/spec/deimos_spec.rb +57 -49
  55. data/spec/generators/active_record_generator_spec.rb +56 -0
  56. data/spec/handlers/my_batch_consumer.rb +6 -1
  57. data/spec/handlers/my_consumer.rb +6 -1
  58. data/spec/kafka_topic_info_spec.rb +39 -16
  59. data/spec/message_spec.rb +19 -0
  60. data/spec/producer_spec.rb +3 -3
  61. data/spec/rake_spec.rb +1 -1
  62. data/spec/schemas/com/my-namespace/Generated.avsc +71 -0
  63. data/spec/schemas/com/my-namespace/MySchemaCompound-key.avsc +18 -0
  64. data/spec/schemas/com/my-namespace/Wibble.avsc +43 -0
  65. data/spec/spec_helper.rb +62 -6
  66. data/spec/utils/db_poller_spec.rb +320 -0
  67. data/spec/utils/db_producer_spec.rb +84 -10
  68. data/spec/utils/deadlock_retry_spec.rb +74 -0
  69. data/spec/utils/lag_reporter_spec.rb +29 -22
  70. metadata +66 -30
  71. data/lib/deimos/base_consumer.rb +0 -104
  72. data/lib/deimos/utils/executor.rb +0 -124
  73. data/lib/deimos/utils/platform_schema_validation.rb +0 -0
  74. data/lib/deimos/utils/signal_handler.rb +0 -68
  75. data/spec/utils/executor_spec.rb +0 -53
  76. data/spec/utils/signal_handler_spec.rb +0 -16
@@ -59,6 +59,29 @@ module Deimos
59
59
  k.to_sym != :payload_key && !fields.map(&:name).include?(k)
60
60
  end
61
61
  end
62
+
63
+ # Query to use when polling the database with the DbPoller. Add
64
+ # includes, joins, or wheres as necessary, or replace entirely.
65
+ # @param time_from [Time] the time to start the query from.
66
+ # @param time_to [Time] the time to end the query.
67
+ # @param column_name [Symbol] the column name to look for.
68
+ # @param min_id [Numeric] the minimum ID (i.e. all IDs must be greater
69
+ # than this value).
70
+ # @return [ActiveRecord::Relation]
71
+ def poll_query(time_from:, time_to:, column_name: :updated_at, min_id:)
72
+ klass = config[:record_class]
73
+ table = ActiveRecord::Base.connection.quote_table_name(klass.table_name)
74
+ column = ActiveRecord::Base.connection.quote_column_name(column_name)
75
+ primary = ActiveRecord::Base.connection.quote_column_name(klass.primary_key)
76
+ klass.where(
77
+ "((#{table}.#{column} = ? AND #{table}.#{primary} > ?) \
78
+ OR #{table}.#{column} > ?) AND #{table}.#{column} <= ?",
79
+ time_from,
80
+ min_id,
81
+ time_from,
82
+ time_to
83
+ )
84
+ end
62
85
  end
63
86
  end
64
87
  end
@@ -1,145 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'deimos/base_consumer'
4
- require 'phobos/batch_handler'
5
-
6
3
  module Deimos
7
- # Class to consume batches of messages in a topic
8
- # Note: According to the docs, instances of your handler will be created
9
- # for every incoming batch of messages. This class should be lightweight.
10
- class BatchConsumer < BaseConsumer
11
- include Phobos::BatchHandler
12
-
13
- # :nodoc:
14
- def around_consume_batch(batch, metadata)
15
- payloads = []
16
- benchmark = Benchmark.measure do
17
- if self.class.config[:key_configured]
18
- metadata[:keys] = batch.map do |message|
19
- decode_key(message.key)
20
- end
21
- end
22
-
23
- payloads = batch.map do |message|
24
- message.payload ? self.class.decoder.decode(message.payload) : nil
25
- end
26
- _received_batch(payloads, metadata)
27
- _with_error_span(payloads, metadata) do
28
- yield payloads, metadata
29
- end
30
- end
31
- _handle_success(benchmark.real, payloads, metadata)
32
- end
33
-
34
- # Consume a batch of incoming messages.
35
- # @param _payloads [Array<Phobos::BatchMessage>]
36
- # @param _metadata [Hash]
37
- def consume_batch(_payloads, _metadata)
38
- raise NotImplementedError
39
- end
40
-
41
- protected
42
-
43
- def _received_batch(payloads, metadata)
44
- Deimos.config.logger.info(
45
- message: 'Got Kafka batch event',
46
- message_ids: _payload_identifiers(payloads, metadata),
47
- metadata: metadata.except(:keys)
48
- )
49
- Deimos.config.logger.debug(
50
- message: 'Kafka batch event payloads',
51
- payloads: payloads
52
- )
53
- Deimos.config.metrics&.increment(
54
- 'handler',
55
- tags: %W(
56
- status:batch_received
57
- topic:#{metadata[:topic]}
58
- ))
59
- Deimos.config.metrics&.increment(
60
- 'handler',
61
- by: metadata['batch_size'],
62
- tags: %W(
63
- status:received
64
- topic:#{metadata[:topic]}
65
- ))
66
- if payloads.present?
67
- payloads.each { |payload| _report_time_delayed(payload, metadata) }
68
- end
69
- end
70
-
71
- # @param exception [Throwable]
72
- # @param payloads [Array<Hash>]
73
- # @param metadata [Hash]
74
- def _handle_error(exception, payloads, metadata)
75
- Deimos.config.metrics&.increment(
76
- 'handler',
77
- tags: %W(
78
- status:batch_error
79
- topic:#{metadata[:topic]}
80
- ))
81
- Deimos.config.logger.warn(
82
- message: 'Error consuming message batch',
83
- handler: self.class.name,
84
- metadata: metadata.except(:keys),
85
- message_ids: _payload_identifiers(payloads, metadata),
86
- error_message: exception.message,
87
- error: exception.backtrace
88
- )
89
- super
90
- end
91
-
92
- # @param time_taken [Float]
93
- # @param payloads [Array<Hash>]
94
- # @param metadata [Hash]
95
- def _handle_success(time_taken, payloads, metadata)
96
- Deimos.config.metrics&.histogram('handler', time_taken, tags: %W(
97
- time:consume_batch
98
- topic:#{metadata[:topic]}
99
- ))
100
- Deimos.config.metrics&.increment(
101
- 'handler',
102
- tags: %W(
103
- status:batch_success
104
- topic:#{metadata[:topic]}
105
- ))
106
- Deimos.config.metrics&.increment(
107
- 'handler',
108
- by: metadata['batch_size'],
109
- tags: %W(
110
- status:success
111
- topic:#{metadata[:topic]}
112
- ))
113
- Deimos.config.logger.info(
114
- message: 'Finished processing Kafka batch event',
115
- message_ids: _payload_identifiers(payloads, metadata),
116
- time_elapsed: time_taken,
117
- metadata: metadata.except(:keys)
118
- )
119
- end
120
-
121
- # Get payload identifiers (key and message_id if present) for logging.
122
- # @param payloads [Array<Hash>]
123
- # @param metadata [Hash]
124
- # @return [Hash] the identifiers.
125
- def _payload_identifiers(payloads, metadata)
126
- message_ids = payloads&.map do |payload|
127
- if payload.is_a?(Hash) && payload.key?('message_id')
128
- payload['message_id']
129
- end
130
- end
131
-
132
- # Payloads may be nil if preprocessing failed
133
- messages = payloads || metadata[:keys] || []
134
-
135
- messages.zip(metadata[:keys] || [], message_ids || []).map do |_, k, m_id|
136
- ids = {}
137
-
138
- ids[:key] = k if k.present?
139
- ids[:message_id] = m_id if m_id.present?
140
-
141
- ids
142
- end
143
- end
4
+ # @deprecated Use Deimos::Consumer with `delivery: inline_batch` configured instead
5
+ class BatchConsumer < Consumer
144
6
  end
145
7
  end
@@ -47,17 +47,15 @@ module Deimos
47
47
  handler_class = listener.handler.constantize
48
48
  delivery = listener.delivery
49
49
 
50
- # Validate that Deimos consumers use proper delivery configs
51
- if handler_class < Deimos::BatchConsumer
52
- unless delivery == 'inline_batch'
53
- raise "BatchConsumer #{listener.handler} must have delivery set to"\
54
- ' `inline_batch`'
55
- end
56
- elsif handler_class < Deimos::Consumer
57
- if delivery.present? && !%w(message batch).include?(delivery)
58
- raise "Non-batch Consumer #{listener.handler} must have delivery"\
59
- ' set to `message` or `batch`'
50
+ next unless handler_class < Deimos::Consumer
51
+
52
+ # Validate that each consumer implements the correct method for its type
53
+ if delivery == 'inline_batch'
54
+ if handler_class.instance_method(:consume_batch).owner == Deimos::Consume::BatchConsumption
55
+ raise "BatchConsumer #{listener.handler} does not implement `consume_batch`"
60
56
  end
57
+ elsif handler_class.instance_method(:consume).owner == Deimos::Consume::MessageConsumption
58
+ raise "Non-batch Consumer #{listener.handler} does not implement `consume`"
61
59
  end
62
60
  end
63
61
  end
@@ -340,6 +338,26 @@ module Deimos
340
338
  setting :heartbeat_interval
341
339
  end
342
340
 
341
+ setting_object :db_poller do
342
+ # Producer class to use for the poller.
343
+ setting :producer_class
344
+ # How often to run the poller, in seconds. If the poll takes longer than this
345
+ # time, it will run again immediately and the timeout
346
+ # will be pushed to the next e.g. 1 minute.
347
+ setting :run_every, 60
348
+ # Column to use to find updates. Must have an index on it.
349
+ setting :timestamp_column, :updated_at
350
+ # Amount of time, in seconds, to wait before catching updates, to allow transactions
351
+ # to complete but still pick up the right records.
352
+ setting :delay_time, 2
353
+ # If true, dump the full table rather than incremental changes. Should
354
+ # only be used for very small tables.
355
+ setting :full_table, false
356
+ # If false, start from the current time instead of the beginning of time
357
+ # if this is the first time running the poller.
358
+ setting :start_from_beginning, true
359
+ end
360
+
343
361
  deprecate 'kafka_logger', 'kafka.logger'
344
362
  deprecate 'reraise_consumer_errors', 'consumers.reraise_errors'
345
363
  deprecate 'schema_registry_url', 'schema.registry_url'
@@ -0,0 +1,150 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ module Consume
5
+ # Helper methods used by batch consumers, i.e. those with "inline_batch"
6
+ # delivery. Payloads are decoded then consumers are invoked with arrays
7
+ # of messages to be handled at once
8
+ module BatchConsumption
9
+ extend ActiveSupport::Concern
10
+ include Phobos::BatchHandler
11
+
12
+ # :nodoc:
13
+ def around_consume_batch(batch, metadata)
14
+ payloads = []
15
+ benchmark = Benchmark.measure do
16
+ if self.class.config[:key_configured]
17
+ metadata[:keys] = batch.map do |message|
18
+ decode_key(message.key)
19
+ end
20
+ end
21
+ metadata[:first_offset] = batch.first&.offset
22
+
23
+ payloads = batch.map do |message|
24
+ message.payload ? self.class.decoder.decode(message.payload) : nil
25
+ end
26
+ _received_batch(payloads, metadata)
27
+ _with_span do
28
+ yield payloads, metadata
29
+ end
30
+ end
31
+ _handle_batch_success(benchmark.real, payloads, metadata)
32
+ rescue StandardError => e
33
+ _handle_batch_error(e, payloads, metadata)
34
+ end
35
+
36
+ # Consume a batch of incoming messages.
37
+ # @param _payloads [Array<Phobos::BatchMessage>]
38
+ # @param _metadata [Hash]
39
+ def consume_batch(_payloads, _metadata)
40
+ raise NotImplementedError
41
+ end
42
+
43
+ protected
44
+
45
+ def _received_batch(payloads, metadata)
46
+ Deimos.config.logger.info(
47
+ message: 'Got Kafka batch event',
48
+ message_ids: _payload_identifiers(payloads, metadata),
49
+ metadata: metadata.except(:keys)
50
+ )
51
+ Deimos.config.logger.debug(
52
+ message: 'Kafka batch event payloads',
53
+ payloads: payloads
54
+ )
55
+ Deimos.config.metrics&.increment(
56
+ 'handler',
57
+ tags: %W(
58
+ status:batch_received
59
+ topic:#{metadata[:topic]}
60
+ ))
61
+ Deimos.config.metrics&.increment(
62
+ 'handler',
63
+ by: metadata['batch_size'],
64
+ tags: %W(
65
+ status:received
66
+ topic:#{metadata[:topic]}
67
+ ))
68
+ if payloads.present?
69
+ payloads.each { |payload| _report_time_delayed(payload, metadata) }
70
+ end
71
+ end
72
+
73
+ # @param exception [Throwable]
74
+ # @param payloads [Array<Hash>]
75
+ # @param metadata [Hash]
76
+ def _handle_batch_error(exception, payloads, metadata)
77
+ Deimos.config.metrics&.increment(
78
+ 'handler',
79
+ tags: %W(
80
+ status:batch_error
81
+ topic:#{metadata[:topic]}
82
+ ))
83
+ Deimos.config.logger.warn(
84
+ message: 'Error consuming message batch',
85
+ handler: self.class.name,
86
+ metadata: metadata.except(:keys),
87
+ message_ids: _payload_identifiers(payloads, metadata),
88
+ error_message: exception.message,
89
+ error: exception.backtrace
90
+ )
91
+ _error(exception, payloads, metadata)
92
+ end
93
+
94
+ # @param time_taken [Float]
95
+ # @param payloads [Array<Hash>]
96
+ # @param metadata [Hash]
97
+ def _handle_batch_success(time_taken, payloads, metadata)
98
+ Deimos.config.metrics&.histogram('handler',
99
+ time_taken,
100
+ tags: %W(
101
+ time:consume_batch
102
+ topic:#{metadata[:topic]}
103
+ ))
104
+ Deimos.config.metrics&.increment(
105
+ 'handler',
106
+ tags: %W(
107
+ status:batch_success
108
+ topic:#{metadata[:topic]}
109
+ ))
110
+ Deimos.config.metrics&.increment(
111
+ 'handler',
112
+ by: metadata['batch_size'],
113
+ tags: %W(
114
+ status:success
115
+ topic:#{metadata[:topic]}
116
+ ))
117
+ Deimos.config.logger.info(
118
+ message: 'Finished processing Kafka batch event',
119
+ message_ids: _payload_identifiers(payloads, metadata),
120
+ time_elapsed: time_taken,
121
+ metadata: metadata.except(:keys)
122
+ )
123
+ end
124
+
125
+ # Get payload identifiers (key and message_id if present) for logging.
126
+ # @param payloads [Array<Hash>]
127
+ # @param metadata [Hash]
128
+ # @return [Array<Array>] the identifiers.
129
+ def _payload_identifiers(payloads, metadata)
130
+ message_ids = payloads&.map do |payload|
131
+ if payload.is_a?(Hash) && payload.key?('message_id')
132
+ payload['message_id']
133
+ end
134
+ end
135
+
136
+ # Payloads may be nil if preprocessing failed
137
+ messages = payloads || metadata[:keys] || []
138
+
139
+ messages.zip(metadata[:keys] || [], message_ids || []).map do |_, k, m_id|
140
+ ids = {}
141
+
142
+ ids[:key] = k if k.present?
143
+ ids[:message_id] = m_id if m_id.present?
144
+
145
+ ids
146
+ end
147
+ end
148
+ end
149
+ end
150
+ end
@@ -0,0 +1,94 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ module Consume
5
+ # Methods used by message-by-message (non-batch) consumers. These consumers
6
+ # are invoked for every individual message.
7
+ module MessageConsumption
8
+ extend ActiveSupport::Concern
9
+ include Phobos::Handler
10
+
11
+ # :nodoc:
12
+ def around_consume(payload, metadata)
13
+ decoded_payload = payload.dup
14
+ new_metadata = metadata.dup
15
+ benchmark = Benchmark.measure do
16
+ _with_span do
17
+ new_metadata[:key] = decode_key(metadata[:key]) if self.class.config[:key_configured]
18
+ decoded_payload = payload ? self.class.decoder.decode(payload) : nil
19
+ _received_message(decoded_payload, new_metadata)
20
+ yield decoded_payload, new_metadata
21
+ end
22
+ end
23
+ _handle_success(benchmark.real, decoded_payload, new_metadata)
24
+ rescue StandardError => e
25
+ _handle_error(e, decoded_payload, new_metadata)
26
+ end
27
+
28
+ # Consume incoming messages.
29
+ # @param _payload [String]
30
+ # @param _metadata [Hash]
31
+ def consume(_payload, _metadata)
32
+ raise NotImplementedError
33
+ end
34
+
35
+ private
36
+
37
+ def _received_message(payload, metadata)
38
+ Deimos.config.logger.info(
39
+ message: 'Got Kafka event',
40
+ payload: payload,
41
+ metadata: metadata
42
+ )
43
+ Deimos.config.metrics&.increment('handler', tags: %W(
44
+ status:received
45
+ topic:#{metadata[:topic]}
46
+ ))
47
+ _report_time_delayed(payload, metadata)
48
+ end
49
+
50
+ # @param exception [Throwable]
51
+ # @param payload [Hash]
52
+ # @param metadata [Hash]
53
+ def _handle_error(exception, payload, metadata)
54
+ Deimos.config.metrics&.increment(
55
+ 'handler',
56
+ tags: %W(
57
+ status:error
58
+ topic:#{metadata[:topic]}
59
+ )
60
+ )
61
+ Deimos.config.logger.warn(
62
+ message: 'Error consuming message',
63
+ handler: self.class.name,
64
+ metadata: metadata,
65
+ data: payload,
66
+ error_message: exception.message,
67
+ error: exception.backtrace
68
+ )
69
+
70
+ _error(exception, payload, metadata)
71
+ end
72
+
73
+ # @param time_taken [Float]
74
+ # @param payload [Hash]
75
+ # @param metadata [Hash]
76
+ def _handle_success(time_taken, payload, metadata)
77
+ Deimos.config.metrics&.histogram('handler', time_taken, tags: %W(
78
+ time:consume
79
+ topic:#{metadata[:topic]}
80
+ ))
81
+ Deimos.config.metrics&.increment('handler', tags: %W(
82
+ status:success
83
+ topic:#{metadata[:topic]}
84
+ ))
85
+ Deimos.config.logger.info(
86
+ message: 'Finished processing Kafka event',
87
+ payload: payload,
88
+ time_elapsed: time_taken,
89
+ metadata: metadata
90
+ )
91
+ end
92
+ end
93
+ end
94
+ end