deimos-ruby 1.6.3 → 1.8.1.pre.beta1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +9 -0
  3. data/.rubocop.yml +22 -16
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +42 -0
  6. data/Gemfile.lock +125 -98
  7. data/README.md +164 -16
  8. data/Rakefile +1 -1
  9. data/deimos-ruby.gemspec +4 -3
  10. data/docs/ARCHITECTURE.md +144 -0
  11. data/docs/CONFIGURATION.md +27 -0
  12. data/lib/deimos.rb +8 -7
  13. data/lib/deimos/active_record_consume/batch_consumption.rb +159 -0
  14. data/lib/deimos/active_record_consume/batch_slicer.rb +27 -0
  15. data/lib/deimos/active_record_consume/message_consumption.rb +58 -0
  16. data/lib/deimos/active_record_consume/schema_model_converter.rb +52 -0
  17. data/lib/deimos/active_record_consumer.rb +33 -75
  18. data/lib/deimos/active_record_producer.rb +23 -0
  19. data/lib/deimos/batch_consumer.rb +2 -140
  20. data/lib/deimos/config/configuration.rb +28 -10
  21. data/lib/deimos/consume/batch_consumption.rb +150 -0
  22. data/lib/deimos/consume/message_consumption.rb +94 -0
  23. data/lib/deimos/consumer.rb +79 -70
  24. data/lib/deimos/kafka_message.rb +1 -1
  25. data/lib/deimos/kafka_topic_info.rb +22 -3
  26. data/lib/deimos/message.rb +6 -1
  27. data/lib/deimos/metrics/provider.rb +0 -2
  28. data/lib/deimos/poll_info.rb +9 -0
  29. data/lib/deimos/schema_backends/avro_base.rb +28 -1
  30. data/lib/deimos/schema_backends/base.rb +15 -2
  31. data/lib/deimos/tracing/provider.rb +0 -2
  32. data/lib/deimos/utils/db_poller.rb +149 -0
  33. data/lib/deimos/utils/db_producer.rb +59 -16
  34. data/lib/deimos/utils/deadlock_retry.rb +68 -0
  35. data/lib/deimos/utils/lag_reporter.rb +19 -26
  36. data/lib/deimos/version.rb +1 -1
  37. data/lib/generators/deimos/active_record/templates/migration.rb.tt +28 -0
  38. data/lib/generators/deimos/active_record/templates/model.rb.tt +5 -0
  39. data/lib/generators/deimos/active_record_generator.rb +79 -0
  40. data/lib/generators/deimos/db_backend/templates/migration +1 -0
  41. data/lib/generators/deimos/db_backend/templates/rails3_migration +1 -0
  42. data/lib/generators/deimos/db_poller/templates/migration +11 -0
  43. data/lib/generators/deimos/db_poller/templates/rails3_migration +16 -0
  44. data/lib/generators/deimos/db_poller_generator.rb +48 -0
  45. data/lib/tasks/deimos.rake +7 -0
  46. data/spec/active_record_batch_consumer_spec.rb +481 -0
  47. data/spec/active_record_consume/batch_slicer_spec.rb +42 -0
  48. data/spec/active_record_consume/schema_model_converter_spec.rb +105 -0
  49. data/spec/active_record_consumer_spec.rb +3 -11
  50. data/spec/active_record_producer_spec.rb +66 -88
  51. data/spec/batch_consumer_spec.rb +24 -7
  52. data/spec/config/configuration_spec.rb +4 -0
  53. data/spec/consumer_spec.rb +8 -8
  54. data/spec/deimos_spec.rb +57 -49
  55. data/spec/generators/active_record_generator_spec.rb +56 -0
  56. data/spec/handlers/my_batch_consumer.rb +6 -1
  57. data/spec/handlers/my_consumer.rb +6 -1
  58. data/spec/kafka_topic_info_spec.rb +39 -16
  59. data/spec/message_spec.rb +19 -0
  60. data/spec/producer_spec.rb +3 -3
  61. data/spec/rake_spec.rb +1 -1
  62. data/spec/schemas/com/my-namespace/Generated.avsc +71 -0
  63. data/spec/schemas/com/my-namespace/MySchemaCompound-key.avsc +18 -0
  64. data/spec/schemas/com/my-namespace/Wibble.avsc +43 -0
  65. data/spec/spec_helper.rb +62 -6
  66. data/spec/utils/db_poller_spec.rb +320 -0
  67. data/spec/utils/db_producer_spec.rb +84 -10
  68. data/spec/utils/deadlock_retry_spec.rb +74 -0
  69. data/spec/utils/lag_reporter_spec.rb +29 -22
  70. metadata +66 -30
  71. data/lib/deimos/base_consumer.rb +0 -104
  72. data/lib/deimos/utils/executor.rb +0 -124
  73. data/lib/deimos/utils/platform_schema_validation.rb +0 -0
  74. data/lib/deimos/utils/signal_handler.rb +0 -68
  75. data/spec/utils/executor_spec.rb +0 -53
  76. data/spec/utils/signal_handler_spec.rb +0 -16
@@ -59,6 +59,29 @@ module Deimos
59
59
  k.to_sym != :payload_key && !fields.map(&:name).include?(k)
60
60
  end
61
61
  end
62
+
63
+ # Query to use when polling the database with the DbPoller. Add
64
+ # includes, joins, or wheres as necessary, or replace entirely.
65
+ # @param time_from [Time] the time to start the query from.
66
+ # @param time_to [Time] the time to end the query.
67
+ # @param column_name [Symbol] the column name to look for.
68
+ # @param min_id [Numeric] the minimum ID (i.e. all IDs must be greater
69
+ # than this value).
70
+ # @return [ActiveRecord::Relation]
71
+ def poll_query(time_from:, time_to:, column_name: :updated_at, min_id:)
72
+ klass = config[:record_class]
73
+ table = ActiveRecord::Base.connection.quote_table_name(klass.table_name)
74
+ column = ActiveRecord::Base.connection.quote_column_name(column_name)
75
+ primary = ActiveRecord::Base.connection.quote_column_name(klass.primary_key)
76
+ klass.where(
77
+ "((#{table}.#{column} = ? AND #{table}.#{primary} > ?) \
78
+ OR #{table}.#{column} > ?) AND #{table}.#{column} <= ?",
79
+ time_from,
80
+ min_id,
81
+ time_from,
82
+ time_to
83
+ )
84
+ end
62
85
  end
63
86
  end
64
87
  end
@@ -1,145 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'deimos/base_consumer'
4
- require 'phobos/batch_handler'
5
-
6
3
  module Deimos
7
- # Class to consume batches of messages in a topic
8
- # Note: According to the docs, instances of your handler will be created
9
- # for every incoming batch of messages. This class should be lightweight.
10
- class BatchConsumer < BaseConsumer
11
- include Phobos::BatchHandler
12
-
13
- # :nodoc:
14
- def around_consume_batch(batch, metadata)
15
- payloads = []
16
- benchmark = Benchmark.measure do
17
- if self.class.config[:key_configured]
18
- metadata[:keys] = batch.map do |message|
19
- decode_key(message.key)
20
- end
21
- end
22
-
23
- payloads = batch.map do |message|
24
- message.payload ? self.class.decoder.decode(message.payload) : nil
25
- end
26
- _received_batch(payloads, metadata)
27
- _with_error_span(payloads, metadata) do
28
- yield payloads, metadata
29
- end
30
- end
31
- _handle_success(benchmark.real, payloads, metadata)
32
- end
33
-
34
- # Consume a batch of incoming messages.
35
- # @param _payloads [Array<Phobos::BatchMessage>]
36
- # @param _metadata [Hash]
37
- def consume_batch(_payloads, _metadata)
38
- raise NotImplementedError
39
- end
40
-
41
- protected
42
-
43
- def _received_batch(payloads, metadata)
44
- Deimos.config.logger.info(
45
- message: 'Got Kafka batch event',
46
- message_ids: _payload_identifiers(payloads, metadata),
47
- metadata: metadata.except(:keys)
48
- )
49
- Deimos.config.logger.debug(
50
- message: 'Kafka batch event payloads',
51
- payloads: payloads
52
- )
53
- Deimos.config.metrics&.increment(
54
- 'handler',
55
- tags: %W(
56
- status:batch_received
57
- topic:#{metadata[:topic]}
58
- ))
59
- Deimos.config.metrics&.increment(
60
- 'handler',
61
- by: metadata['batch_size'],
62
- tags: %W(
63
- status:received
64
- topic:#{metadata[:topic]}
65
- ))
66
- if payloads.present?
67
- payloads.each { |payload| _report_time_delayed(payload, metadata) }
68
- end
69
- end
70
-
71
- # @param exception [Throwable]
72
- # @param payloads [Array<Hash>]
73
- # @param metadata [Hash]
74
- def _handle_error(exception, payloads, metadata)
75
- Deimos.config.metrics&.increment(
76
- 'handler',
77
- tags: %W(
78
- status:batch_error
79
- topic:#{metadata[:topic]}
80
- ))
81
- Deimos.config.logger.warn(
82
- message: 'Error consuming message batch',
83
- handler: self.class.name,
84
- metadata: metadata.except(:keys),
85
- message_ids: _payload_identifiers(payloads, metadata),
86
- error_message: exception.message,
87
- error: exception.backtrace
88
- )
89
- super
90
- end
91
-
92
- # @param time_taken [Float]
93
- # @param payloads [Array<Hash>]
94
- # @param metadata [Hash]
95
- def _handle_success(time_taken, payloads, metadata)
96
- Deimos.config.metrics&.histogram('handler', time_taken, tags: %W(
97
- time:consume_batch
98
- topic:#{metadata[:topic]}
99
- ))
100
- Deimos.config.metrics&.increment(
101
- 'handler',
102
- tags: %W(
103
- status:batch_success
104
- topic:#{metadata[:topic]}
105
- ))
106
- Deimos.config.metrics&.increment(
107
- 'handler',
108
- by: metadata['batch_size'],
109
- tags: %W(
110
- status:success
111
- topic:#{metadata[:topic]}
112
- ))
113
- Deimos.config.logger.info(
114
- message: 'Finished processing Kafka batch event',
115
- message_ids: _payload_identifiers(payloads, metadata),
116
- time_elapsed: time_taken,
117
- metadata: metadata.except(:keys)
118
- )
119
- end
120
-
121
- # Get payload identifiers (key and message_id if present) for logging.
122
- # @param payloads [Array<Hash>]
123
- # @param metadata [Hash]
124
- # @return [Hash] the identifiers.
125
- def _payload_identifiers(payloads, metadata)
126
- message_ids = payloads&.map do |payload|
127
- if payload.is_a?(Hash) && payload.key?('message_id')
128
- payload['message_id']
129
- end
130
- end
131
-
132
- # Payloads may be nil if preprocessing failed
133
- messages = payloads || metadata[:keys] || []
134
-
135
- messages.zip(metadata[:keys] || [], message_ids || []).map do |_, k, m_id|
136
- ids = {}
137
-
138
- ids[:key] = k if k.present?
139
- ids[:message_id] = m_id if m_id.present?
140
-
141
- ids
142
- end
143
- end
4
+ # @deprecated Use Deimos::Consumer with `delivery: inline_batch` configured instead
5
+ class BatchConsumer < Consumer
144
6
  end
145
7
  end
@@ -47,17 +47,15 @@ module Deimos
47
47
  handler_class = listener.handler.constantize
48
48
  delivery = listener.delivery
49
49
 
50
- # Validate that Deimos consumers use proper delivery configs
51
- if handler_class < Deimos::BatchConsumer
52
- unless delivery == 'inline_batch'
53
- raise "BatchConsumer #{listener.handler} must have delivery set to"\
54
- ' `inline_batch`'
55
- end
56
- elsif handler_class < Deimos::Consumer
57
- if delivery.present? && !%w(message batch).include?(delivery)
58
- raise "Non-batch Consumer #{listener.handler} must have delivery"\
59
- ' set to `message` or `batch`'
50
+ next unless handler_class < Deimos::Consumer
51
+
52
+ # Validate that each consumer implements the correct method for its type
53
+ if delivery == 'inline_batch'
54
+ if handler_class.instance_method(:consume_batch).owner == Deimos::Consume::BatchConsumption
55
+ raise "BatchConsumer #{listener.handler} does not implement `consume_batch`"
60
56
  end
57
+ elsif handler_class.instance_method(:consume).owner == Deimos::Consume::MessageConsumption
58
+ raise "Non-batch Consumer #{listener.handler} does not implement `consume`"
61
59
  end
62
60
  end
63
61
  end
@@ -340,6 +338,26 @@ module Deimos
340
338
  setting :heartbeat_interval
341
339
  end
342
340
 
341
+ setting_object :db_poller do
342
+ # Producer class to use for the poller.
343
+ setting :producer_class
344
+ # How often to run the poller, in seconds. If the poll takes longer than this
345
+ # time, it will run again immediately and the timeout
346
+ # will be pushed to the next e.g. 1 minute.
347
+ setting :run_every, 60
348
+ # Column to use to find updates. Must have an index on it.
349
+ setting :timestamp_column, :updated_at
350
+ # Amount of time, in seconds, to wait before catching updates, to allow transactions
351
+ # to complete but still pick up the right records.
352
+ setting :delay_time, 2
353
+ # If true, dump the full table rather than incremental changes. Should
354
+ # only be used for very small tables.
355
+ setting :full_table, false
356
+ # If false, start from the current time instead of the beginning of time
357
+ # if this is the first time running the poller.
358
+ setting :start_from_beginning, true
359
+ end
360
+
343
361
  deprecate 'kafka_logger', 'kafka.logger'
344
362
  deprecate 'reraise_consumer_errors', 'consumers.reraise_errors'
345
363
  deprecate 'schema_registry_url', 'schema.registry_url'
@@ -0,0 +1,150 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ module Consume
5
+ # Helper methods used by batch consumers, i.e. those with "inline_batch"
6
+ # delivery. Payloads are decoded then consumers are invoked with arrays
7
+ # of messages to be handled at once
8
+ module BatchConsumption
9
+ extend ActiveSupport::Concern
10
+ include Phobos::BatchHandler
11
+
12
+ # :nodoc:
13
+ def around_consume_batch(batch, metadata)
14
+ payloads = []
15
+ benchmark = Benchmark.measure do
16
+ if self.class.config[:key_configured]
17
+ metadata[:keys] = batch.map do |message|
18
+ decode_key(message.key)
19
+ end
20
+ end
21
+ metadata[:first_offset] = batch.first&.offset
22
+
23
+ payloads = batch.map do |message|
24
+ message.payload ? self.class.decoder.decode(message.payload) : nil
25
+ end
26
+ _received_batch(payloads, metadata)
27
+ _with_span do
28
+ yield payloads, metadata
29
+ end
30
+ end
31
+ _handle_batch_success(benchmark.real, payloads, metadata)
32
+ rescue StandardError => e
33
+ _handle_batch_error(e, payloads, metadata)
34
+ end
35
+
36
+ # Consume a batch of incoming messages.
37
+ # @param _payloads [Array<Phobos::BatchMessage>]
38
+ # @param _metadata [Hash]
39
+ def consume_batch(_payloads, _metadata)
40
+ raise NotImplementedError
41
+ end
42
+
43
+ protected
44
+
45
+ def _received_batch(payloads, metadata)
46
+ Deimos.config.logger.info(
47
+ message: 'Got Kafka batch event',
48
+ message_ids: _payload_identifiers(payloads, metadata),
49
+ metadata: metadata.except(:keys)
50
+ )
51
+ Deimos.config.logger.debug(
52
+ message: 'Kafka batch event payloads',
53
+ payloads: payloads
54
+ )
55
+ Deimos.config.metrics&.increment(
56
+ 'handler',
57
+ tags: %W(
58
+ status:batch_received
59
+ topic:#{metadata[:topic]}
60
+ ))
61
+ Deimos.config.metrics&.increment(
62
+ 'handler',
63
+ by: metadata['batch_size'],
64
+ tags: %W(
65
+ status:received
66
+ topic:#{metadata[:topic]}
67
+ ))
68
+ if payloads.present?
69
+ payloads.each { |payload| _report_time_delayed(payload, metadata) }
70
+ end
71
+ end
72
+
73
+ # @param exception [Throwable]
74
+ # @param payloads [Array<Hash>]
75
+ # @param metadata [Hash]
76
+ def _handle_batch_error(exception, payloads, metadata)
77
+ Deimos.config.metrics&.increment(
78
+ 'handler',
79
+ tags: %W(
80
+ status:batch_error
81
+ topic:#{metadata[:topic]}
82
+ ))
83
+ Deimos.config.logger.warn(
84
+ message: 'Error consuming message batch',
85
+ handler: self.class.name,
86
+ metadata: metadata.except(:keys),
87
+ message_ids: _payload_identifiers(payloads, metadata),
88
+ error_message: exception.message,
89
+ error: exception.backtrace
90
+ )
91
+ _error(exception, payloads, metadata)
92
+ end
93
+
94
+ # @param time_taken [Float]
95
+ # @param payloads [Array<Hash>]
96
+ # @param metadata [Hash]
97
+ def _handle_batch_success(time_taken, payloads, metadata)
98
+ Deimos.config.metrics&.histogram('handler',
99
+ time_taken,
100
+ tags: %W(
101
+ time:consume_batch
102
+ topic:#{metadata[:topic]}
103
+ ))
104
+ Deimos.config.metrics&.increment(
105
+ 'handler',
106
+ tags: %W(
107
+ status:batch_success
108
+ topic:#{metadata[:topic]}
109
+ ))
110
+ Deimos.config.metrics&.increment(
111
+ 'handler',
112
+ by: metadata['batch_size'],
113
+ tags: %W(
114
+ status:success
115
+ topic:#{metadata[:topic]}
116
+ ))
117
+ Deimos.config.logger.info(
118
+ message: 'Finished processing Kafka batch event',
119
+ message_ids: _payload_identifiers(payloads, metadata),
120
+ time_elapsed: time_taken,
121
+ metadata: metadata.except(:keys)
122
+ )
123
+ end
124
+
125
+ # Get payload identifiers (key and message_id if present) for logging.
126
+ # @param payloads [Array<Hash>]
127
+ # @param metadata [Hash]
128
+ # @return [Array<Array>] the identifiers.
129
+ def _payload_identifiers(payloads, metadata)
130
+ message_ids = payloads&.map do |payload|
131
+ if payload.is_a?(Hash) && payload.key?('message_id')
132
+ payload['message_id']
133
+ end
134
+ end
135
+
136
+ # Payloads may be nil if preprocessing failed
137
+ messages = payloads || metadata[:keys] || []
138
+
139
+ messages.zip(metadata[:keys] || [], message_ids || []).map do |_, k, m_id|
140
+ ids = {}
141
+
142
+ ids[:key] = k if k.present?
143
+ ids[:message_id] = m_id if m_id.present?
144
+
145
+ ids
146
+ end
147
+ end
148
+ end
149
+ end
150
+ end
@@ -0,0 +1,94 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ module Consume
5
+ # Methods used by message-by-message (non-batch) consumers. These consumers
6
+ # are invoked for every individual message.
7
+ module MessageConsumption
8
+ extend ActiveSupport::Concern
9
+ include Phobos::Handler
10
+
11
+ # :nodoc:
12
+ def around_consume(payload, metadata)
13
+ decoded_payload = payload.dup
14
+ new_metadata = metadata.dup
15
+ benchmark = Benchmark.measure do
16
+ _with_span do
17
+ new_metadata[:key] = decode_key(metadata[:key]) if self.class.config[:key_configured]
18
+ decoded_payload = payload ? self.class.decoder.decode(payload) : nil
19
+ _received_message(decoded_payload, new_metadata)
20
+ yield decoded_payload, new_metadata
21
+ end
22
+ end
23
+ _handle_success(benchmark.real, decoded_payload, new_metadata)
24
+ rescue StandardError => e
25
+ _handle_error(e, decoded_payload, new_metadata)
26
+ end
27
+
28
+ # Consume incoming messages.
29
+ # @param _payload [String]
30
+ # @param _metadata [Hash]
31
+ def consume(_payload, _metadata)
32
+ raise NotImplementedError
33
+ end
34
+
35
+ private
36
+
37
+ def _received_message(payload, metadata)
38
+ Deimos.config.logger.info(
39
+ message: 'Got Kafka event',
40
+ payload: payload,
41
+ metadata: metadata
42
+ )
43
+ Deimos.config.metrics&.increment('handler', tags: %W(
44
+ status:received
45
+ topic:#{metadata[:topic]}
46
+ ))
47
+ _report_time_delayed(payload, metadata)
48
+ end
49
+
50
+ # @param exception [Throwable]
51
+ # @param payload [Hash]
52
+ # @param metadata [Hash]
53
+ def _handle_error(exception, payload, metadata)
54
+ Deimos.config.metrics&.increment(
55
+ 'handler',
56
+ tags: %W(
57
+ status:error
58
+ topic:#{metadata[:topic]}
59
+ )
60
+ )
61
+ Deimos.config.logger.warn(
62
+ message: 'Error consuming message',
63
+ handler: self.class.name,
64
+ metadata: metadata,
65
+ data: payload,
66
+ error_message: exception.message,
67
+ error: exception.backtrace
68
+ )
69
+
70
+ _error(exception, payload, metadata)
71
+ end
72
+
73
+ # @param time_taken [Float]
74
+ # @param payload [Hash]
75
+ # @param metadata [Hash]
76
+ def _handle_success(time_taken, payload, metadata)
77
+ Deimos.config.metrics&.histogram('handler', time_taken, tags: %W(
78
+ time:consume
79
+ topic:#{metadata[:topic]}
80
+ ))
81
+ Deimos.config.metrics&.increment('handler', tags: %W(
82
+ status:success
83
+ topic:#{metadata[:topic]}
84
+ ))
85
+ Deimos.config.logger.info(
86
+ message: 'Finished processing Kafka event',
87
+ payload: payload,
88
+ time_elapsed: time_taken,
89
+ metadata: metadata
90
+ )
91
+ end
92
+ end
93
+ end
94
+ end