deimos-temp-fork 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.circleci/config.yml +83 -0
- data/.gitignore +41 -0
- data/.gitmodules +0 -0
- data/.rspec +1 -0
- data/.rubocop.yml +333 -0
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/CHANGELOG.md +349 -0
- data/CODE_OF_CONDUCT.md +77 -0
- data/Dockerfile +23 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +286 -0
- data/Guardfile +22 -0
- data/LICENSE.md +195 -0
- data/README.md +1099 -0
- data/Rakefile +13 -0
- data/bin/deimos +4 -0
- data/deimos-ruby.gemspec +44 -0
- data/docker-compose.yml +71 -0
- data/docs/ARCHITECTURE.md +140 -0
- data/docs/CONFIGURATION.md +236 -0
- data/docs/DATABASE_BACKEND.md +147 -0
- data/docs/INTEGRATION_TESTS.md +52 -0
- data/docs/PULL_REQUEST_TEMPLATE.md +35 -0
- data/docs/UPGRADING.md +128 -0
- data/lib/deimos-temp-fork.rb +95 -0
- data/lib/deimos/active_record_consume/batch_consumption.rb +164 -0
- data/lib/deimos/active_record_consume/batch_slicer.rb +27 -0
- data/lib/deimos/active_record_consume/message_consumption.rb +79 -0
- data/lib/deimos/active_record_consume/schema_model_converter.rb +52 -0
- data/lib/deimos/active_record_consumer.rb +67 -0
- data/lib/deimos/active_record_producer.rb +87 -0
- data/lib/deimos/backends/base.rb +32 -0
- data/lib/deimos/backends/db.rb +41 -0
- data/lib/deimos/backends/kafka.rb +33 -0
- data/lib/deimos/backends/kafka_async.rb +33 -0
- data/lib/deimos/backends/test.rb +20 -0
- data/lib/deimos/batch_consumer.rb +7 -0
- data/lib/deimos/config/configuration.rb +381 -0
- data/lib/deimos/config/phobos_config.rb +137 -0
- data/lib/deimos/consume/batch_consumption.rb +150 -0
- data/lib/deimos/consume/message_consumption.rb +94 -0
- data/lib/deimos/consumer.rb +104 -0
- data/lib/deimos/instrumentation.rb +76 -0
- data/lib/deimos/kafka_message.rb +60 -0
- data/lib/deimos/kafka_source.rb +128 -0
- data/lib/deimos/kafka_topic_info.rb +102 -0
- data/lib/deimos/message.rb +79 -0
- data/lib/deimos/metrics/datadog.rb +47 -0
- data/lib/deimos/metrics/mock.rb +39 -0
- data/lib/deimos/metrics/provider.rb +36 -0
- data/lib/deimos/monkey_patches/phobos_cli.rb +35 -0
- data/lib/deimos/monkey_patches/phobos_producer.rb +51 -0
- data/lib/deimos/poll_info.rb +9 -0
- data/lib/deimos/producer.rb +224 -0
- data/lib/deimos/railtie.rb +8 -0
- data/lib/deimos/schema_backends/avro_base.rb +140 -0
- data/lib/deimos/schema_backends/avro_local.rb +30 -0
- data/lib/deimos/schema_backends/avro_schema_coercer.rb +119 -0
- data/lib/deimos/schema_backends/avro_schema_registry.rb +34 -0
- data/lib/deimos/schema_backends/avro_validation.rb +21 -0
- data/lib/deimos/schema_backends/base.rb +150 -0
- data/lib/deimos/schema_backends/mock.rb +42 -0
- data/lib/deimos/shared_config.rb +63 -0
- data/lib/deimos/test_helpers.rb +360 -0
- data/lib/deimos/tracing/datadog.rb +35 -0
- data/lib/deimos/tracing/mock.rb +40 -0
- data/lib/deimos/tracing/provider.rb +29 -0
- data/lib/deimos/utils/db_poller.rb +150 -0
- data/lib/deimos/utils/db_producer.rb +243 -0
- data/lib/deimos/utils/deadlock_retry.rb +68 -0
- data/lib/deimos/utils/inline_consumer.rb +150 -0
- data/lib/deimos/utils/lag_reporter.rb +175 -0
- data/lib/deimos/utils/schema_controller_mixin.rb +115 -0
- data/lib/deimos/version.rb +5 -0
- data/lib/generators/deimos/active_record/templates/migration.rb.tt +28 -0
- data/lib/generators/deimos/active_record/templates/model.rb.tt +5 -0
- data/lib/generators/deimos/active_record_generator.rb +79 -0
- data/lib/generators/deimos/db_backend/templates/migration +25 -0
- data/lib/generators/deimos/db_backend/templates/rails3_migration +31 -0
- data/lib/generators/deimos/db_backend_generator.rb +48 -0
- data/lib/generators/deimos/db_poller/templates/migration +11 -0
- data/lib/generators/deimos/db_poller/templates/rails3_migration +16 -0
- data/lib/generators/deimos/db_poller_generator.rb +48 -0
- data/lib/tasks/deimos.rake +34 -0
- data/spec/active_record_batch_consumer_spec.rb +481 -0
- data/spec/active_record_consume/batch_slicer_spec.rb +42 -0
- data/spec/active_record_consume/schema_model_converter_spec.rb +105 -0
- data/spec/active_record_consumer_spec.rb +154 -0
- data/spec/active_record_producer_spec.rb +85 -0
- data/spec/backends/base_spec.rb +10 -0
- data/spec/backends/db_spec.rb +54 -0
- data/spec/backends/kafka_async_spec.rb +11 -0
- data/spec/backends/kafka_spec.rb +11 -0
- data/spec/batch_consumer_spec.rb +256 -0
- data/spec/config/configuration_spec.rb +248 -0
- data/spec/consumer_spec.rb +209 -0
- data/spec/deimos_spec.rb +169 -0
- data/spec/generators/active_record_generator_spec.rb +56 -0
- data/spec/handlers/my_batch_consumer.rb +10 -0
- data/spec/handlers/my_consumer.rb +10 -0
- data/spec/kafka_listener_spec.rb +55 -0
- data/spec/kafka_source_spec.rb +381 -0
- data/spec/kafka_topic_info_spec.rb +111 -0
- data/spec/message_spec.rb +19 -0
- data/spec/phobos.bad_db.yml +73 -0
- data/spec/phobos.yml +77 -0
- data/spec/producer_spec.rb +498 -0
- data/spec/rake_spec.rb +19 -0
- data/spec/schema_backends/avro_base_shared.rb +199 -0
- data/spec/schema_backends/avro_local_spec.rb +32 -0
- data/spec/schema_backends/avro_schema_registry_spec.rb +32 -0
- data/spec/schema_backends/avro_validation_spec.rb +24 -0
- data/spec/schema_backends/base_spec.rb +33 -0
- data/spec/schemas/com/my-namespace/Generated.avsc +71 -0
- data/spec/schemas/com/my-namespace/MyNestedSchema.avsc +62 -0
- data/spec/schemas/com/my-namespace/MySchema-key.avsc +13 -0
- data/spec/schemas/com/my-namespace/MySchema.avsc +18 -0
- data/spec/schemas/com/my-namespace/MySchemaCompound-key.avsc +18 -0
- data/spec/schemas/com/my-namespace/MySchemaWithBooleans.avsc +18 -0
- data/spec/schemas/com/my-namespace/MySchemaWithDateTimes.avsc +33 -0
- data/spec/schemas/com/my-namespace/MySchemaWithId.avsc +28 -0
- data/spec/schemas/com/my-namespace/MySchemaWithUniqueId.avsc +32 -0
- data/spec/schemas/com/my-namespace/Wibble.avsc +43 -0
- data/spec/schemas/com/my-namespace/Widget.avsc +27 -0
- data/spec/schemas/com/my-namespace/WidgetTheSecond.avsc +27 -0
- data/spec/schemas/com/my-namespace/request/CreateTopic.avsc +11 -0
- data/spec/schemas/com/my-namespace/request/Index.avsc +11 -0
- data/spec/schemas/com/my-namespace/request/UpdateRequest.avsc +11 -0
- data/spec/schemas/com/my-namespace/response/CreateTopic.avsc +11 -0
- data/spec/schemas/com/my-namespace/response/Index.avsc +11 -0
- data/spec/schemas/com/my-namespace/response/UpdateResponse.avsc +11 -0
- data/spec/spec_helper.rb +267 -0
- data/spec/utils/db_poller_spec.rb +320 -0
- data/spec/utils/db_producer_spec.rb +514 -0
- data/spec/utils/deadlock_retry_spec.rb +74 -0
- data/spec/utils/inline_consumer_spec.rb +31 -0
- data/spec/utils/lag_reporter_spec.rb +76 -0
- data/spec/utils/platform_schema_validation_spec.rb +0 -0
- data/spec/utils/schema_controller_mixin_spec.rb +84 -0
- data/support/deimos-solo.png +0 -0
- data/support/deimos-with-name-next.png +0 -0
- data/support/deimos-with-name.png +0 -0
- data/support/flipp-logo.png +0 -0
- metadata +551 -0
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'deimos/tracing/provider'
|
|
4
|
+
|
|
5
|
+
module Deimos
|
|
6
|
+
module Tracing
|
|
7
|
+
# Tracing wrapper class for Datadog.
|
|
8
|
+
class Datadog < Tracing::Provider
|
|
9
|
+
# :nodoc:
|
|
10
|
+
def initialize(config)
|
|
11
|
+
raise 'Tracing config must specify service_name' if config[:service_name].nil?
|
|
12
|
+
|
|
13
|
+
@service = config[:service_name]
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
# :nodoc:
|
|
17
|
+
def start(span_name, options={})
|
|
18
|
+
span = ::Datadog.tracer.trace(span_name)
|
|
19
|
+
span.service = @service
|
|
20
|
+
span.resource = options[:resource]
|
|
21
|
+
span
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# :nodoc:
|
|
25
|
+
def finish(span)
|
|
26
|
+
span.finish
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# :nodoc:
|
|
30
|
+
def set_error(span, exception)
|
|
31
|
+
span.set_error(exception)
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'deimos/tracing/provider'
|
|
4
|
+
|
|
5
|
+
module Deimos
|
|
6
|
+
module Tracing
|
|
7
|
+
# Class that mocks out tracing functionality
|
|
8
|
+
class Mock < Tracing::Provider
|
|
9
|
+
# :nodoc:
|
|
10
|
+
def initialize(logger=nil)
|
|
11
|
+
@logger = logger || Logger.new(STDOUT)
|
|
12
|
+
@logger.info('MockTracingProvider initialized')
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# :nodoc:
|
|
16
|
+
def start(span_name, _options={})
|
|
17
|
+
@logger.info("Mock span '#{span_name}' started")
|
|
18
|
+
{
|
|
19
|
+
name: span_name,
|
|
20
|
+
started_at: Time.zone.now
|
|
21
|
+
}
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# :nodoc:
|
|
25
|
+
def finish(span)
|
|
26
|
+
name = span[:name]
|
|
27
|
+
start = span[:started_at]
|
|
28
|
+
finish = Time.zone.now
|
|
29
|
+
@logger.info("Mock span '#{name}' finished: #{start} to #{finish}")
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# :nodoc:
|
|
33
|
+
def set_error(span, exception)
|
|
34
|
+
span[:exception] = exception
|
|
35
|
+
name = span[:name]
|
|
36
|
+
@logger.info("Mock span '#{name}' set an error: #{exception}")
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Deimos
|
|
4
|
+
module Tracing
|
|
5
|
+
# Base class for all tracing providers.
|
|
6
|
+
class Provider
|
|
7
|
+
# Returns a span object and starts the trace.
|
|
8
|
+
# @param span_name [String] The name of the span/trace
|
|
9
|
+
# @param options [Hash] Options for the span
|
|
10
|
+
# @return [Object] The span object
|
|
11
|
+
def start(span_name, options={})
|
|
12
|
+
raise NotImplementedError
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# Finishes the trace on the span object.
|
|
16
|
+
# @param span [Object] The span to finish trace on
|
|
17
|
+
def finish(span)
|
|
18
|
+
raise NotImplementedError
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Set an error on the span.
|
|
22
|
+
# @param span [Object] The span to set error on
|
|
23
|
+
# @param exception [Exception] The exception that occurred
|
|
24
|
+
def set_error(span, exception)
|
|
25
|
+
raise NotImplementedError
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'deimos/poll_info'
|
|
4
|
+
require 'sigurd/executor'
|
|
5
|
+
require 'sigurd/signal_handler'
|
|
6
|
+
|
|
7
|
+
module Deimos
|
|
8
|
+
module Utils
|
|
9
|
+
# Class which continually polls the database and sends Kafka messages.
|
|
10
|
+
class DbPoller
|
|
11
|
+
BATCH_SIZE = 1000
|
|
12
|
+
|
|
13
|
+
# Needed for Executor so it can identify the worker
|
|
14
|
+
attr_reader :id
|
|
15
|
+
|
|
16
|
+
# Begin the DB Poller process.
|
|
17
|
+
def self.start!
|
|
18
|
+
if Deimos.config.db_poller_objects.empty?
|
|
19
|
+
raise('No pollers configured!')
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
pollers = Deimos.config.db_poller_objects.map do |poller_config|
|
|
23
|
+
self.new(poller_config)
|
|
24
|
+
end
|
|
25
|
+
executor = Sigurd::Executor.new(pollers,
|
|
26
|
+
sleep_seconds: 5,
|
|
27
|
+
logger: Deimos.config.logger)
|
|
28
|
+
signal_handler = Sigurd::SignalHandler.new(executor)
|
|
29
|
+
signal_handler.run!
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# @param config [Deimos::Configuration::ConfigStruct]
|
|
33
|
+
def initialize(config)
|
|
34
|
+
@config = config
|
|
35
|
+
@id = SecureRandom.hex
|
|
36
|
+
begin
|
|
37
|
+
@producer = @config.producer_class.constantize
|
|
38
|
+
rescue NameError
|
|
39
|
+
raise "Class #{@config.producer_class} not found!"
|
|
40
|
+
end
|
|
41
|
+
unless @producer < Deimos::ActiveRecordProducer
|
|
42
|
+
raise "Class #{@producer.class.name} is not an ActiveRecordProducer!"
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Start the poll:
|
|
47
|
+
# 1) Grab the current PollInfo from the database indicating the last
|
|
48
|
+
# time we ran
|
|
49
|
+
# 2) On a loop, process all the recent updates between the last time
|
|
50
|
+
# we ran and now.
|
|
51
|
+
def start
|
|
52
|
+
# Don't send asynchronously
|
|
53
|
+
if Deimos.config.producers.backend == :kafka_async
|
|
54
|
+
Deimos.config.producers.backend = :kafka
|
|
55
|
+
end
|
|
56
|
+
Deimos.config.logger.info('Starting...')
|
|
57
|
+
@signal_to_stop = false
|
|
58
|
+
retrieve_poll_info
|
|
59
|
+
loop do
|
|
60
|
+
if @signal_to_stop
|
|
61
|
+
Deimos.config.logger.info('Shutting down')
|
|
62
|
+
break
|
|
63
|
+
end
|
|
64
|
+
process_updates
|
|
65
|
+
sleep 0.1
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Grab the PollInfo or create if it doesn't exist.
|
|
70
|
+
def retrieve_poll_info
|
|
71
|
+
ActiveRecord::Base.connection.reconnect! unless ActiveRecord::Base.connection.open_transactions.positive?
|
|
72
|
+
new_time = @config.start_from_beginning ? Time.new(0) : Time.zone.now
|
|
73
|
+
@info = Deimos::PollInfo.find_by_producer(@config.producer_class) ||
|
|
74
|
+
Deimos::PollInfo.create!(producer: @config.producer_class,
|
|
75
|
+
last_sent: new_time,
|
|
76
|
+
last_sent_id: 0)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Stop the poll.
|
|
80
|
+
def stop
|
|
81
|
+
Deimos.config.logger.info('Received signal to stop')
|
|
82
|
+
@signal_to_stop = true
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Indicate whether this current loop should process updates. Most loops
|
|
86
|
+
# will busy-wait (sleeping 0.1 seconds) until it's ready.
|
|
87
|
+
# @return [Boolean]
|
|
88
|
+
def should_run?
|
|
89
|
+
Time.zone.now - @info.last_sent - @config.delay_time >= @config.run_every
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# @param record [ActiveRecord::Base]
|
|
93
|
+
# @return [ActiveSupport::TimeWithZone]
|
|
94
|
+
def last_updated(record)
|
|
95
|
+
record.public_send(@config.timestamp_column)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Send messages for updated data.
|
|
99
|
+
def process_updates
|
|
100
|
+
return unless should_run?
|
|
101
|
+
|
|
102
|
+
time_from = @config.full_table ? Time.new(0) : @info.last_sent.in_time_zone
|
|
103
|
+
time_to = Time.zone.now - @config.delay_time
|
|
104
|
+
Deimos.config.logger.info("Polling #{@producer.topic} from #{time_from} to #{time_to}")
|
|
105
|
+
message_count = 0
|
|
106
|
+
batch_count = 0
|
|
107
|
+
|
|
108
|
+
# poll_query gets all the relevant data from the database, as defined
|
|
109
|
+
# by the producer itself.
|
|
110
|
+
loop do
|
|
111
|
+
Deimos.config.logger.debug("Polling #{@producer.topic}, batch #{batch_count + 1}")
|
|
112
|
+
batch = fetch_results(time_from, time_to).to_a
|
|
113
|
+
break if batch.empty?
|
|
114
|
+
|
|
115
|
+
batch_count += 1
|
|
116
|
+
process_batch(batch)
|
|
117
|
+
message_count += batch.size
|
|
118
|
+
time_from = last_updated(batch.last)
|
|
119
|
+
end
|
|
120
|
+
Deimos.config.logger.info("Poll #{@producer.topic} complete at #{time_to} (#{message_count} messages, #{batch_count} batches}")
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# @param time_from [ActiveSupport::TimeWithZone]
|
|
124
|
+
# @param time_to [ActiveSupport::TimeWithZone]
|
|
125
|
+
# @return [ActiveRecord::Relation]
|
|
126
|
+
def fetch_results(time_from, time_to)
|
|
127
|
+
id = @producer.config[:record_class].primary_key
|
|
128
|
+
quoted_timestamp = ActiveRecord::Base.connection.quote_column_name(@config.timestamp_column)
|
|
129
|
+
quoted_id = ActiveRecord::Base.connection.quote_column_name(id)
|
|
130
|
+
@producer.poll_query(time_from: time_from,
|
|
131
|
+
time_to: time_to,
|
|
132
|
+
column_name: @config.timestamp_column,
|
|
133
|
+
min_id: @info.last_sent_id).
|
|
134
|
+
limit(BATCH_SIZE).
|
|
135
|
+
order("#{quoted_timestamp}, #{quoted_id}")
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# @param batch [Array<ActiveRecord::Base>]
|
|
139
|
+
def process_batch(batch)
|
|
140
|
+
record = batch.last
|
|
141
|
+
id_method = record.class.primary_key
|
|
142
|
+
last_id = record.public_send(id_method)
|
|
143
|
+
last_updated_at = last_updated(record)
|
|
144
|
+
@producer.send_events(batch)
|
|
145
|
+
@info.attributes = { last_sent: last_updated_at, last_sent_id: last_id }
|
|
146
|
+
@info.save!
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
end
|
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Deimos
|
|
4
|
+
module Utils
|
|
5
|
+
# Class which continually polls the kafka_messages table
|
|
6
|
+
# in the database and sends Kafka messages.
|
|
7
|
+
class DbProducer
|
|
8
|
+
include Phobos::Producer
|
|
9
|
+
attr_accessor :id, :current_topic
|
|
10
|
+
|
|
11
|
+
BATCH_SIZE = 1000
|
|
12
|
+
DELETE_BATCH_SIZE = 10
|
|
13
|
+
MAX_DELETE_ATTEMPTS = 3
|
|
14
|
+
|
|
15
|
+
# @param logger [Logger]
|
|
16
|
+
def initialize(logger=Logger.new(STDOUT))
|
|
17
|
+
@id = SecureRandom.uuid
|
|
18
|
+
@logger = logger
|
|
19
|
+
@logger.push_tags("DbProducer #{@id}") if @logger.respond_to?(:push_tags)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# @return [Deimos::DbProducerConfig]
|
|
23
|
+
def config
|
|
24
|
+
Deimos.config.db_producer
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Start the poll.
|
|
28
|
+
def start
|
|
29
|
+
@logger.info('Starting...')
|
|
30
|
+
@signal_to_stop = false
|
|
31
|
+
ActiveRecord::Base.connection.reconnect!
|
|
32
|
+
loop do
|
|
33
|
+
if @signal_to_stop
|
|
34
|
+
@logger.info('Shutting down')
|
|
35
|
+
break
|
|
36
|
+
end
|
|
37
|
+
send_pending_metrics
|
|
38
|
+
process_next_messages
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Stop the poll.
|
|
43
|
+
def stop
|
|
44
|
+
@logger.info('Received signal to stop')
|
|
45
|
+
@signal_to_stop = true
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Complete one loop of processing all messages in the DB.
|
|
49
|
+
def process_next_messages
|
|
50
|
+
topics = retrieve_topics
|
|
51
|
+
@logger.info("Found topics: #{topics}")
|
|
52
|
+
topics.each(&method(:process_topic))
|
|
53
|
+
KafkaTopicInfo.ping_empty_topics(topics)
|
|
54
|
+
sleep(0.5)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# @return [Array<String>]
|
|
58
|
+
def retrieve_topics
|
|
59
|
+
KafkaMessage.select('distinct topic').map(&:topic).uniq
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# @param topic [String]
|
|
63
|
+
# @return [String, nil] the topic that was locked, or nil if none were.
|
|
64
|
+
def process_topic(topic)
|
|
65
|
+
# If the topic is already locked, another producer is currently
|
|
66
|
+
# working on it. Move on to the next one.
|
|
67
|
+
unless KafkaTopicInfo.lock(topic, @id)
|
|
68
|
+
@logger.debug("Could not lock topic #{topic} - continuing")
|
|
69
|
+
return
|
|
70
|
+
end
|
|
71
|
+
@current_topic = topic
|
|
72
|
+
|
|
73
|
+
loop { break unless process_topic_batch }
|
|
74
|
+
|
|
75
|
+
KafkaTopicInfo.clear_lock(@current_topic, @id)
|
|
76
|
+
rescue StandardError => e
|
|
77
|
+
@logger.error("Error processing messages for topic #{@current_topic}: #{e.class.name}: #{e.message} #{e.backtrace.join("\n")}")
|
|
78
|
+
KafkaTopicInfo.register_error(@current_topic, @id)
|
|
79
|
+
shutdown_producer
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Process a single batch in a topic.
|
|
83
|
+
def process_topic_batch
|
|
84
|
+
messages = retrieve_messages
|
|
85
|
+
return false if messages.empty?
|
|
86
|
+
|
|
87
|
+
batch_size = messages.size
|
|
88
|
+
compacted_messages = compact_messages(messages)
|
|
89
|
+
log_messages(compacted_messages)
|
|
90
|
+
Deimos.instrument('db_producer.produce', topic: @current_topic, messages: compacted_messages) do
|
|
91
|
+
begin
|
|
92
|
+
produce_messages(compacted_messages.map(&:phobos_message))
|
|
93
|
+
rescue Kafka::BufferOverflow, Kafka::MessageSizeTooLarge, Kafka::RecordListTooLarge
|
|
94
|
+
delete_messages(messages)
|
|
95
|
+
@logger.error('Message batch too large, deleting...')
|
|
96
|
+
@logger.error(Deimos::KafkaMessage.decoded(messages))
|
|
97
|
+
raise
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
delete_messages(messages)
|
|
101
|
+
Deimos.config.metrics&.increment(
|
|
102
|
+
'db_producer.process',
|
|
103
|
+
tags: %W(topic:#{@current_topic}),
|
|
104
|
+
by: messages.size
|
|
105
|
+
)
|
|
106
|
+
return false if batch_size < BATCH_SIZE
|
|
107
|
+
|
|
108
|
+
KafkaTopicInfo.heartbeat(@current_topic, @id) # keep alive
|
|
109
|
+
send_pending_metrics
|
|
110
|
+
true
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# @param messages [Array<Deimos::KafkaMessage>]
|
|
114
|
+
def delete_messages(messages)
|
|
115
|
+
attempts = 1
|
|
116
|
+
begin
|
|
117
|
+
messages.in_groups_of(DELETE_BATCH_SIZE, false).each do |batch|
|
|
118
|
+
Deimos::KafkaMessage.where(topic: batch.first.topic,
|
|
119
|
+
id: batch.map(&:id)).
|
|
120
|
+
delete_all
|
|
121
|
+
end
|
|
122
|
+
rescue StandardError => e
|
|
123
|
+
if (e.message =~ /Lock wait/i || e.message =~ /Lost connection/i) &&
|
|
124
|
+
attempts <= MAX_DELETE_ATTEMPTS
|
|
125
|
+
attempts += 1
|
|
126
|
+
ActiveRecord::Base.connection.verify!
|
|
127
|
+
sleep(1)
|
|
128
|
+
retry
|
|
129
|
+
end
|
|
130
|
+
raise
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# @return [Array<Deimos::KafkaMessage>]
|
|
135
|
+
def retrieve_messages
|
|
136
|
+
KafkaMessage.where(topic: @current_topic).order(:id).limit(BATCH_SIZE)
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# @param messages [Array<Deimos::KafkaMessage>]
|
|
140
|
+
def log_messages(messages)
|
|
141
|
+
return if config.log_topics != :all && !config.log_topics.include?(@current_topic)
|
|
142
|
+
|
|
143
|
+
@logger.debug do
|
|
144
|
+
decoded_messages = Deimos::KafkaMessage.decoded(messages)
|
|
145
|
+
"DB producer: Topic #{@current_topic} Producing messages: #{decoded_messages}}"
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# Send metrics to Datadog.
|
|
150
|
+
def send_pending_metrics
|
|
151
|
+
metrics = Deimos.config.metrics
|
|
152
|
+
return unless metrics
|
|
153
|
+
|
|
154
|
+
topics = KafkaTopicInfo.select(%w(topic last_processed_at))
|
|
155
|
+
messages = Deimos::KafkaMessage.
|
|
156
|
+
select('count(*) as num_messages, min(created_at) as earliest, topic').
|
|
157
|
+
group(:topic).
|
|
158
|
+
index_by(&:topic)
|
|
159
|
+
topics.each do |record|
|
|
160
|
+
message_record = messages[record.topic]
|
|
161
|
+
# We want to record the last time we saw any activity, meaning either
|
|
162
|
+
# the oldest message, or the last time we processed, whichever comes
|
|
163
|
+
# last.
|
|
164
|
+
if message_record
|
|
165
|
+
record_earliest = message_record.earliest
|
|
166
|
+
# SQLite gives a string here
|
|
167
|
+
if record_earliest.is_a?(String)
|
|
168
|
+
record_earliest = Time.zone.parse(record_earliest)
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
earliest = [record.last_processed_at, record_earliest].max
|
|
172
|
+
time_diff = Time.zone.now - earliest
|
|
173
|
+
metrics.gauge('pending_db_messages_max_wait', time_diff,
|
|
174
|
+
tags: ["topic:#{record.topic}"])
|
|
175
|
+
else
|
|
176
|
+
# no messages waiting
|
|
177
|
+
metrics.gauge('pending_db_messages_max_wait', 0,
|
|
178
|
+
tags: ["topic:#{record.topic}"])
|
|
179
|
+
end
|
|
180
|
+
metrics.gauge('pending_db_messages_count', message_record&.num_messages || 0,
|
|
181
|
+
tags: ["topic:#{record.topic}"])
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
# Shut down the sync producer if we have to. Phobos will automatically
|
|
186
|
+
# create a new one. We should call this if the producer can be in a bad
|
|
187
|
+
# state and e.g. we need to clear the buffer.
|
|
188
|
+
def shutdown_producer
|
|
189
|
+
if self.class.producer.respond_to?(:sync_producer_shutdown) # Phobos 1.8.3
|
|
190
|
+
self.class.producer.sync_producer_shutdown
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
# Produce messages in batches, reducing the size 1/10 if the batch is too
|
|
195
|
+
# large. Does not retry batches of messages that have already been sent.
|
|
196
|
+
# @param batch [Array<Hash>]
|
|
197
|
+
def produce_messages(batch)
|
|
198
|
+
batch_size = batch.size
|
|
199
|
+
current_index = 0
|
|
200
|
+
begin
|
|
201
|
+
batch[current_index..-1].in_groups_of(batch_size, false).each do |group|
|
|
202
|
+
@logger.debug("Publishing #{group.size} messages to #{@current_topic}")
|
|
203
|
+
producer.publish_list(group)
|
|
204
|
+
Deimos.config.metrics&.increment(
|
|
205
|
+
'publish',
|
|
206
|
+
tags: %W(status:success topic:#{@current_topic}),
|
|
207
|
+
by: group.size
|
|
208
|
+
)
|
|
209
|
+
current_index += group.size
|
|
210
|
+
@logger.info("Sent #{group.size} messages to #{@current_topic}")
|
|
211
|
+
end
|
|
212
|
+
rescue Kafka::BufferOverflow, Kafka::MessageSizeTooLarge,
|
|
213
|
+
Kafka::RecordListTooLarge => e
|
|
214
|
+
if batch_size == 1
|
|
215
|
+
shutdown_producer
|
|
216
|
+
raise
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
@logger.error("Got error #{e.class.name} when publishing #{batch.size} in groups of #{batch_size}, retrying...")
|
|
220
|
+
batch_size = if batch_size < 10
|
|
221
|
+
1
|
|
222
|
+
else
|
|
223
|
+
(batch_size / 10).to_i
|
|
224
|
+
end
|
|
225
|
+
shutdown_producer
|
|
226
|
+
retry
|
|
227
|
+
end
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
# @param batch [Array<Deimos::KafkaMessage>]
|
|
231
|
+
# @return [Array<Deimos::KafkaMessage>]
|
|
232
|
+
def compact_messages(batch)
|
|
233
|
+
return batch if batch.first&.key.blank?
|
|
234
|
+
|
|
235
|
+
topic = batch.first.topic
|
|
236
|
+
return batch if config.compact_topics != :all &&
|
|
237
|
+
!config.compact_topics.include?(topic)
|
|
238
|
+
|
|
239
|
+
batch.reverse.uniq(&:key).reverse!
|
|
240
|
+
end
|
|
241
|
+
end
|
|
242
|
+
end
|
|
243
|
+
end
|