deimos-ruby 1.6.2 → 1.8.0.pre.beta2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.circleci/config.yml +9 -0
- data/.rubocop.yml +15 -13
- data/.ruby-version +1 -1
- data/CHANGELOG.md +31 -0
- data/Gemfile.lock +43 -36
- data/README.md +141 -16
- data/Rakefile +1 -1
- data/deimos-ruby.gemspec +2 -1
- data/docs/ARCHITECTURE.md +144 -0
- data/docs/CONFIGURATION.md +27 -0
- data/lib/deimos.rb +7 -6
- data/lib/deimos/active_record_consume/batch_consumption.rb +159 -0
- data/lib/deimos/active_record_consume/batch_slicer.rb +27 -0
- data/lib/deimos/active_record_consume/message_consumption.rb +58 -0
- data/lib/deimos/active_record_consume/schema_model_converter.rb +52 -0
- data/lib/deimos/active_record_consumer.rb +33 -75
- data/lib/deimos/active_record_producer.rb +23 -0
- data/lib/deimos/batch_consumer.rb +2 -140
- data/lib/deimos/config/configuration.rb +28 -10
- data/lib/deimos/consume/batch_consumption.rb +150 -0
- data/lib/deimos/consume/message_consumption.rb +94 -0
- data/lib/deimos/consumer.rb +79 -69
- data/lib/deimos/kafka_message.rb +1 -1
- data/lib/deimos/kafka_topic_info.rb +1 -1
- data/lib/deimos/message.rb +6 -1
- data/lib/deimos/metrics/provider.rb +0 -2
- data/lib/deimos/poll_info.rb +9 -0
- data/lib/deimos/tracing/provider.rb +0 -2
- data/lib/deimos/utils/db_poller.rb +149 -0
- data/lib/deimos/utils/db_producer.rb +8 -3
- data/lib/deimos/utils/deadlock_retry.rb +68 -0
- data/lib/deimos/utils/lag_reporter.rb +19 -26
- data/lib/deimos/version.rb +1 -1
- data/lib/generators/deimos/db_poller/templates/migration +11 -0
- data/lib/generators/deimos/db_poller/templates/rails3_migration +16 -0
- data/lib/generators/deimos/db_poller_generator.rb +48 -0
- data/lib/tasks/deimos.rake +7 -0
- data/spec/active_record_batch_consumer_spec.rb +481 -0
- data/spec/active_record_consume/batch_slicer_spec.rb +42 -0
- data/spec/active_record_consume/schema_model_converter_spec.rb +105 -0
- data/spec/active_record_consumer_spec.rb +3 -11
- data/spec/active_record_producer_spec.rb +66 -88
- data/spec/batch_consumer_spec.rb +24 -7
- data/spec/config/configuration_spec.rb +4 -0
- data/spec/consumer_spec.rb +8 -8
- data/spec/deimos_spec.rb +57 -49
- data/spec/handlers/my_batch_consumer.rb +6 -1
- data/spec/handlers/my_consumer.rb +6 -1
- data/spec/message_spec.rb +19 -0
- data/spec/producer_spec.rb +3 -3
- data/spec/rake_spec.rb +1 -1
- data/spec/schemas/com/my-namespace/MySchemaCompound-key.avsc +18 -0
- data/spec/schemas/com/my-namespace/Wibble.avsc +43 -0
- data/spec/spec_helper.rb +61 -6
- data/spec/utils/db_poller_spec.rb +320 -0
- data/spec/utils/deadlock_retry_spec.rb +74 -0
- data/spec/utils/lag_reporter_spec.rb +29 -22
- metadata +55 -20
- data/lib/deimos/base_consumer.rb +0 -104
- data/lib/deimos/utils/executor.rb +0 -124
- data/lib/deimos/utils/platform_schema_validation.rb +0 -0
- data/lib/deimos/utils/signal_handler.rb +0 -68
- data/spec/utils/executor_spec.rb +0 -53
- data/spec/utils/signal_handler_spec.rb +0 -16
@@ -0,0 +1,94 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Deimos
|
4
|
+
module Consume
|
5
|
+
# Methods used by message-by-message (non-batch) consumers. These consumers
|
6
|
+
# are invoked for every individual message.
|
7
|
+
module MessageConsumption
|
8
|
+
extend ActiveSupport::Concern
|
9
|
+
include Phobos::Handler
|
10
|
+
|
11
|
+
# :nodoc:
|
12
|
+
def around_consume(payload, metadata)
|
13
|
+
decoded_payload = payload.dup
|
14
|
+
new_metadata = metadata.dup
|
15
|
+
benchmark = Benchmark.measure do
|
16
|
+
_with_span do
|
17
|
+
new_metadata[:key] = decode_key(metadata[:key]) if self.class.config[:key_configured]
|
18
|
+
decoded_payload = payload ? self.class.decoder.decode(payload) : nil
|
19
|
+
_received_message(decoded_payload, new_metadata)
|
20
|
+
yield decoded_payload, new_metadata
|
21
|
+
end
|
22
|
+
end
|
23
|
+
_handle_success(benchmark.real, decoded_payload, new_metadata)
|
24
|
+
rescue StandardError => e
|
25
|
+
_handle_error(e, decoded_payload, new_metadata)
|
26
|
+
end
|
27
|
+
|
28
|
+
# Consume incoming messages.
|
29
|
+
# @param _payload [String]
|
30
|
+
# @param _metadata [Hash]
|
31
|
+
def consume(_payload, _metadata)
|
32
|
+
raise NotImplementedError
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
def _received_message(payload, metadata)
|
38
|
+
Deimos.config.logger.info(
|
39
|
+
message: 'Got Kafka event',
|
40
|
+
payload: payload,
|
41
|
+
metadata: metadata
|
42
|
+
)
|
43
|
+
Deimos.config.metrics&.increment('handler', tags: %W(
|
44
|
+
status:received
|
45
|
+
topic:#{metadata[:topic]}
|
46
|
+
))
|
47
|
+
_report_time_delayed(payload, metadata)
|
48
|
+
end
|
49
|
+
|
50
|
+
# @param exception [Throwable]
|
51
|
+
# @param payload [Hash]
|
52
|
+
# @param metadata [Hash]
|
53
|
+
def _handle_error(exception, payload, metadata)
|
54
|
+
Deimos.config.metrics&.increment(
|
55
|
+
'handler',
|
56
|
+
tags: %W(
|
57
|
+
status:error
|
58
|
+
topic:#{metadata[:topic]}
|
59
|
+
)
|
60
|
+
)
|
61
|
+
Deimos.config.logger.warn(
|
62
|
+
message: 'Error consuming message',
|
63
|
+
handler: self.class.name,
|
64
|
+
metadata: metadata,
|
65
|
+
data: payload,
|
66
|
+
error_message: exception.message,
|
67
|
+
error: exception.backtrace
|
68
|
+
)
|
69
|
+
|
70
|
+
_error(exception, payload, metadata)
|
71
|
+
end
|
72
|
+
|
73
|
+
# @param time_taken [Float]
|
74
|
+
# @param payload [Hash]
|
75
|
+
# @param metadata [Hash]
|
76
|
+
def _handle_success(time_taken, payload, metadata)
|
77
|
+
Deimos.config.metrics&.histogram('handler', time_taken, tags: %W(
|
78
|
+
time:consume
|
79
|
+
topic:#{metadata[:topic]}
|
80
|
+
))
|
81
|
+
Deimos.config.metrics&.increment('handler', tags: %W(
|
82
|
+
status:success
|
83
|
+
topic:#{metadata[:topic]}
|
84
|
+
))
|
85
|
+
Deimos.config.logger.info(
|
86
|
+
message: 'Finished processing Kafka event',
|
87
|
+
payload: payload,
|
88
|
+
time_elapsed: time_taken,
|
89
|
+
metadata: metadata
|
90
|
+
)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
data/lib/deimos/consumer.rb
CHANGED
@@ -1,94 +1,104 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require 'deimos/
|
4
|
-
require 'deimos/
|
5
|
-
require 'phobos/handler'
|
6
|
-
require 'active_support/all'
|
3
|
+
require 'deimos/consume/batch_consumption'
|
4
|
+
require 'deimos/consume/message_consumption'
|
7
5
|
|
8
|
-
# Class to consume messages coming from
|
6
|
+
# Class to consume messages coming from a Kafka topic
|
9
7
|
# Note: According to the docs, instances of your handler will be created
|
10
|
-
# for every incoming message. This class should be lightweight.
|
8
|
+
# for every incoming message/batch. This class should be lightweight.
|
11
9
|
module Deimos
|
12
|
-
#
|
13
|
-
|
14
|
-
|
10
|
+
# Basic consumer class. Inherit from this class and override either consume
|
11
|
+
# or consume_batch, depending on the delivery mode of your listener.
|
12
|
+
# `consume` -> use `delivery :message` or `delivery :batch`
|
13
|
+
# `consume_batch` -> use `delivery :inline_batch`
|
14
|
+
class Consumer
|
15
|
+
include Consume::MessageConsumption
|
16
|
+
include Consume::BatchConsumption
|
17
|
+
include SharedConfig
|
15
18
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
19
|
+
class << self
|
20
|
+
# @return [Deimos::SchemaBackends::Base]
|
21
|
+
def decoder
|
22
|
+
@decoder ||= Deimos.schema_backend(schema: config[:schema],
|
23
|
+
namespace: config[:namespace])
|
24
|
+
end
|
25
|
+
|
26
|
+
# @return [Deimos::SchemaBackends::Base]
|
27
|
+
def key_decoder
|
28
|
+
@key_decoder ||= Deimos.schema_backend(schema: config[:key_schema],
|
29
|
+
namespace: config[:namespace])
|
26
30
|
end
|
27
|
-
_handle_success(benchmark.real, payload, metadata)
|
28
31
|
end
|
29
32
|
|
30
|
-
#
|
31
|
-
# @param
|
32
|
-
# @
|
33
|
-
def
|
34
|
-
|
33
|
+
# Helper method to decode an encoded key.
|
34
|
+
# @param key [String]
|
35
|
+
# @return [Object] the decoded key.
|
36
|
+
def decode_key(key)
|
37
|
+
return nil if key.nil?
|
38
|
+
|
39
|
+
config = self.class.config
|
40
|
+
unless config[:key_configured]
|
41
|
+
raise 'No key config given - if you are not decoding keys, please use '\
|
42
|
+
'`key_config plain: true`'
|
43
|
+
end
|
44
|
+
|
45
|
+
if config[:key_field]
|
46
|
+
self.class.decoder.decode_key(key, config[:key_field])
|
47
|
+
elsif config[:key_schema]
|
48
|
+
self.class.key_decoder.decode(key, schema: config[:key_schema])
|
49
|
+
else # no encoding
|
50
|
+
key
|
51
|
+
end
|
35
52
|
end
|
36
53
|
|
37
54
|
private
|
38
55
|
|
39
|
-
def
|
40
|
-
Deimos.config.
|
41
|
-
|
42
|
-
|
43
|
-
metadata: metadata
|
56
|
+
def _with_span
|
57
|
+
@span = Deimos.config.tracer&.start(
|
58
|
+
'deimos-consumer',
|
59
|
+
resource: self.class.name.gsub('::', '-')
|
44
60
|
)
|
45
|
-
|
46
|
-
|
61
|
+
yield
|
62
|
+
ensure
|
63
|
+
Deimos.config.tracer&.finish(@span)
|
64
|
+
end
|
65
|
+
|
66
|
+
def _report_time_delayed(payload, metadata)
|
67
|
+
return if payload.nil? || payload['timestamp'].blank?
|
68
|
+
|
69
|
+
begin
|
70
|
+
time_delayed = Time.now.in_time_zone - payload['timestamp'].to_datetime
|
71
|
+
rescue ArgumentError
|
72
|
+
Deimos.config.logger.info(
|
73
|
+
message: "Error parsing timestamp! #{payload['timestamp']}"
|
74
|
+
)
|
75
|
+
return
|
76
|
+
end
|
77
|
+
Deimos.config.metrics&.histogram('handler', time_delayed, tags: %W(
|
78
|
+
time:time_delayed
|
47
79
|
topic:#{metadata[:topic]}
|
48
80
|
))
|
49
|
-
_report_time_delayed(payload, metadata)
|
50
81
|
end
|
51
82
|
|
52
|
-
#
|
53
|
-
#
|
54
|
-
# @param
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
topic:#{metadata[:topic]}
|
61
|
-
)
|
62
|
-
)
|
63
|
-
Deimos.config.logger.warn(
|
64
|
-
message: 'Error consuming message',
|
65
|
-
handler: self.class.name,
|
66
|
-
metadata: metadata,
|
67
|
-
data: payload,
|
68
|
-
error_message: exception.message,
|
69
|
-
error: exception.backtrace
|
70
|
-
)
|
71
|
-
super
|
83
|
+
# Overrideable method to determine if a given error should be considered
|
84
|
+
# "fatal" and always be reraised.
|
85
|
+
# @param _error [Exception]
|
86
|
+
# @param _payload [Hash]
|
87
|
+
# @param _metadata [Hash]
|
88
|
+
# @return [Boolean]
|
89
|
+
def fatal_error?(_error, _payload, _metadata)
|
90
|
+
false
|
72
91
|
end
|
73
92
|
|
74
|
-
# @param
|
93
|
+
# @param exception [Exception]
|
75
94
|
# @param payload [Hash]
|
76
95
|
# @param metadata [Hash]
|
77
|
-
def
|
78
|
-
Deimos.config.
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
status:success
|
84
|
-
topic:#{metadata[:topic]}
|
85
|
-
))
|
86
|
-
Deimos.config.logger.info(
|
87
|
-
message: 'Finished processing Kafka event',
|
88
|
-
payload: payload,
|
89
|
-
time_elapsed: time_taken,
|
90
|
-
metadata: metadata
|
91
|
-
)
|
96
|
+
def _error(exception, payload, metadata)
|
97
|
+
Deimos.config.tracer&.set_error(@span, exception)
|
98
|
+
|
99
|
+
raise if Deimos.config.consumers.reraise_errors ||
|
100
|
+
Deimos.config.consumers.fatal_error&.call(exception, payload, metadata) ||
|
101
|
+
fatal_error?(exception, payload, metadata)
|
92
102
|
end
|
93
103
|
end
|
94
104
|
end
|
data/lib/deimos/kafka_message.rb
CHANGED
data/lib/deimos/message.rb
CHANGED
@@ -10,7 +10,7 @@ module Deimos
|
|
10
10
|
# @param producer [Class]
|
11
11
|
def initialize(payload, producer, topic: nil, key: nil, partition_key: nil)
|
12
12
|
@payload = payload&.with_indifferent_access
|
13
|
-
@producer_name = producer
|
13
|
+
@producer_name = producer&.name
|
14
14
|
@topic = topic
|
15
15
|
@key = key
|
16
16
|
@partition_key = partition_key
|
@@ -70,5 +70,10 @@ module Deimos
|
|
70
70
|
def ==(other)
|
71
71
|
self.to_h == other.to_h
|
72
72
|
end
|
73
|
+
|
74
|
+
# @return [Boolean] True if this message is a tombstone
|
75
|
+
def tombstone?
|
76
|
+
payload.nil?
|
77
|
+
end
|
73
78
|
end
|
74
79
|
end
|
@@ -0,0 +1,149 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'deimos/poll_info'
|
4
|
+
require 'sigurd/executor'
|
5
|
+
require 'sigurd/signal_handler'
|
6
|
+
|
7
|
+
module Deimos
|
8
|
+
module Utils
|
9
|
+
# Class which continually polls the database and sends Kafka messages.
|
10
|
+
class DbPoller
|
11
|
+
BATCH_SIZE = 1000
|
12
|
+
|
13
|
+
# Needed for Executor so it can identify the worker
|
14
|
+
attr_reader :id
|
15
|
+
|
16
|
+
# Begin the DB Poller process.
|
17
|
+
def self.start!
|
18
|
+
if Deimos.config.db_poller_objects.empty?
|
19
|
+
raise('No pollers configured!')
|
20
|
+
end
|
21
|
+
|
22
|
+
pollers = Deimos.config.db_poller_objects.map do |poller_config|
|
23
|
+
self.new(poller_config)
|
24
|
+
end
|
25
|
+
executor = Sigurd::Executor.new(pollers,
|
26
|
+
sleep_seconds: 5,
|
27
|
+
logger: Deimos.config.logger)
|
28
|
+
signal_handler = Sigurd::SignalHandler.new(executor)
|
29
|
+
signal_handler.run!
|
30
|
+
end
|
31
|
+
|
32
|
+
# @param config [Deimos::Configuration::ConfigStruct]
|
33
|
+
def initialize(config)
|
34
|
+
@config = config
|
35
|
+
@id = SecureRandom.hex
|
36
|
+
begin
|
37
|
+
@producer = @config.producer_class.constantize
|
38
|
+
rescue NameError
|
39
|
+
raise "Class #{@config.producer_class} not found!"
|
40
|
+
end
|
41
|
+
unless @producer < Deimos::ActiveRecordProducer
|
42
|
+
raise "Class #{@producer.class.name} is not an ActiveRecordProducer!"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# Start the poll:
|
47
|
+
# 1) Grab the current PollInfo from the database indicating the last
|
48
|
+
# time we ran
|
49
|
+
# 2) On a loop, process all the recent updates between the last time
|
50
|
+
# we ran and now.
|
51
|
+
def start
|
52
|
+
# Don't send asynchronously
|
53
|
+
if Deimos.config.producers.backend == :kafka_async
|
54
|
+
Deimos.config.producers.backend = :kafka
|
55
|
+
end
|
56
|
+
Deimos.config.logger.info('Starting...')
|
57
|
+
@signal_to_stop = false
|
58
|
+
retrieve_poll_info
|
59
|
+
loop do
|
60
|
+
if @signal_to_stop
|
61
|
+
Deimos.config.logger.info('Shutting down')
|
62
|
+
break
|
63
|
+
end
|
64
|
+
process_updates
|
65
|
+
sleep 0.1
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
# Grab the PollInfo or create if it doesn't exist.
|
70
|
+
def retrieve_poll_info
|
71
|
+
ActiveRecord::Base.connection.reconnect!
|
72
|
+
new_time = @config.start_from_beginning ? Time.new(0) : Time.zone.now
|
73
|
+
@info = Deimos::PollInfo.find_by_producer(@config.producer_class) ||
|
74
|
+
Deimos::PollInfo.create!(producer: @config.producer_class,
|
75
|
+
last_sent: new_time,
|
76
|
+
last_sent_id: 0)
|
77
|
+
end
|
78
|
+
|
79
|
+
# Stop the poll.
|
80
|
+
def stop
|
81
|
+
Deimos.config.logger.info('Received signal to stop')
|
82
|
+
@signal_to_stop = true
|
83
|
+
end
|
84
|
+
|
85
|
+
# Indicate whether this current loop should process updates. Most loops
|
86
|
+
# will busy-wait (sleeping 0.1 seconds) until it's ready.
|
87
|
+
# @return [Boolean]
|
88
|
+
def should_run?
|
89
|
+
Time.zone.now - @info.last_sent - @config.delay_time >= @config.run_every
|
90
|
+
end
|
91
|
+
|
92
|
+
# @param record [ActiveRecord::Base]
|
93
|
+
# @return [ActiveSupport::TimeWithZone]
|
94
|
+
def last_updated(record)
|
95
|
+
record.public_send(@config.timestamp_column)
|
96
|
+
end
|
97
|
+
|
98
|
+
# Send messages for updated data.
|
99
|
+
def process_updates
|
100
|
+
return unless should_run?
|
101
|
+
|
102
|
+
time_from = @config.full_table ? Time.new(0) : @info.last_sent.in_time_zone
|
103
|
+
time_to = Time.zone.now - @config.delay_time
|
104
|
+
Deimos.config.logger.info("Polling #{@producer.topic} from #{time_from} to #{time_to}")
|
105
|
+
message_count = 0
|
106
|
+
batch_count = 0
|
107
|
+
|
108
|
+
# poll_query gets all the relevant data from the database, as defined
|
109
|
+
# by the producer itself.
|
110
|
+
loop do
|
111
|
+
Deimos.config.logger.debug("Polling #{@producer.topic}, batch #{batch_count + 1}")
|
112
|
+
batch = fetch_results(time_from, time_to).to_a
|
113
|
+
break if batch.empty?
|
114
|
+
|
115
|
+
batch_count += 1
|
116
|
+
process_batch(batch)
|
117
|
+
message_count += batch.size
|
118
|
+
time_from = last_updated(batch.last)
|
119
|
+
end
|
120
|
+
Deimos.config.logger.info("Poll #{@producer.topic} complete at #{time_to} (#{message_count} messages, #{batch_count} batches}")
|
121
|
+
end
|
122
|
+
|
123
|
+
# @param time_from [ActiveSupport::TimeWithZone]
|
124
|
+
# @param time_to [ActiveSupport::TimeWithZone]
|
125
|
+
# @return [ActiveRecord::Relation]
|
126
|
+
def fetch_results(time_from, time_to)
|
127
|
+
id = @producer.config[:record_class].primary_key
|
128
|
+
quoted_timestamp = ActiveRecord::Base.connection.quote_column_name(@config.timestamp_column)
|
129
|
+
quoted_id = ActiveRecord::Base.connection.quote_column_name(id)
|
130
|
+
@producer.poll_query(time_from: time_from,
|
131
|
+
time_to: time_to,
|
132
|
+
column_name: @config.timestamp_column,
|
133
|
+
min_id: @info.last_sent_id).
|
134
|
+
limit(BATCH_SIZE).
|
135
|
+
order("#{quoted_timestamp}, #{quoted_id}")
|
136
|
+
end
|
137
|
+
|
138
|
+
# @param batch [Array<ActiveRecord::Base>]
|
139
|
+
def process_batch(batch)
|
140
|
+
record = batch.last
|
141
|
+
id_method = record.class.primary_key
|
142
|
+
last_id = record.public_send(id_method)
|
143
|
+
last_updated_at = last_updated(record)
|
144
|
+
@producer.send_events(batch)
|
145
|
+
@info.update_attributes!(last_sent: last_updated_at, last_sent_id: last_id)
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|