deimos-temp-fork 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (146) hide show
  1. checksums.yaml +7 -0
  2. data/.circleci/config.yml +83 -0
  3. data/.gitignore +41 -0
  4. data/.gitmodules +0 -0
  5. data/.rspec +1 -0
  6. data/.rubocop.yml +333 -0
  7. data/.ruby-gemset +1 -0
  8. data/.ruby-version +1 -0
  9. data/CHANGELOG.md +349 -0
  10. data/CODE_OF_CONDUCT.md +77 -0
  11. data/Dockerfile +23 -0
  12. data/Gemfile +6 -0
  13. data/Gemfile.lock +286 -0
  14. data/Guardfile +22 -0
  15. data/LICENSE.md +195 -0
  16. data/README.md +1099 -0
  17. data/Rakefile +13 -0
  18. data/bin/deimos +4 -0
  19. data/deimos-ruby.gemspec +44 -0
  20. data/docker-compose.yml +71 -0
  21. data/docs/ARCHITECTURE.md +140 -0
  22. data/docs/CONFIGURATION.md +236 -0
  23. data/docs/DATABASE_BACKEND.md +147 -0
  24. data/docs/INTEGRATION_TESTS.md +52 -0
  25. data/docs/PULL_REQUEST_TEMPLATE.md +35 -0
  26. data/docs/UPGRADING.md +128 -0
  27. data/lib/deimos-temp-fork.rb +95 -0
  28. data/lib/deimos/active_record_consume/batch_consumption.rb +164 -0
  29. data/lib/deimos/active_record_consume/batch_slicer.rb +27 -0
  30. data/lib/deimos/active_record_consume/message_consumption.rb +79 -0
  31. data/lib/deimos/active_record_consume/schema_model_converter.rb +52 -0
  32. data/lib/deimos/active_record_consumer.rb +67 -0
  33. data/lib/deimos/active_record_producer.rb +87 -0
  34. data/lib/deimos/backends/base.rb +32 -0
  35. data/lib/deimos/backends/db.rb +41 -0
  36. data/lib/deimos/backends/kafka.rb +33 -0
  37. data/lib/deimos/backends/kafka_async.rb +33 -0
  38. data/lib/deimos/backends/test.rb +20 -0
  39. data/lib/deimos/batch_consumer.rb +7 -0
  40. data/lib/deimos/config/configuration.rb +381 -0
  41. data/lib/deimos/config/phobos_config.rb +137 -0
  42. data/lib/deimos/consume/batch_consumption.rb +150 -0
  43. data/lib/deimos/consume/message_consumption.rb +94 -0
  44. data/lib/deimos/consumer.rb +104 -0
  45. data/lib/deimos/instrumentation.rb +76 -0
  46. data/lib/deimos/kafka_message.rb +60 -0
  47. data/lib/deimos/kafka_source.rb +128 -0
  48. data/lib/deimos/kafka_topic_info.rb +102 -0
  49. data/lib/deimos/message.rb +79 -0
  50. data/lib/deimos/metrics/datadog.rb +47 -0
  51. data/lib/deimos/metrics/mock.rb +39 -0
  52. data/lib/deimos/metrics/provider.rb +36 -0
  53. data/lib/deimos/monkey_patches/phobos_cli.rb +35 -0
  54. data/lib/deimos/monkey_patches/phobos_producer.rb +51 -0
  55. data/lib/deimos/poll_info.rb +9 -0
  56. data/lib/deimos/producer.rb +224 -0
  57. data/lib/deimos/railtie.rb +8 -0
  58. data/lib/deimos/schema_backends/avro_base.rb +140 -0
  59. data/lib/deimos/schema_backends/avro_local.rb +30 -0
  60. data/lib/deimos/schema_backends/avro_schema_coercer.rb +119 -0
  61. data/lib/deimos/schema_backends/avro_schema_registry.rb +34 -0
  62. data/lib/deimos/schema_backends/avro_validation.rb +21 -0
  63. data/lib/deimos/schema_backends/base.rb +150 -0
  64. data/lib/deimos/schema_backends/mock.rb +42 -0
  65. data/lib/deimos/shared_config.rb +63 -0
  66. data/lib/deimos/test_helpers.rb +360 -0
  67. data/lib/deimos/tracing/datadog.rb +35 -0
  68. data/lib/deimos/tracing/mock.rb +40 -0
  69. data/lib/deimos/tracing/provider.rb +29 -0
  70. data/lib/deimos/utils/db_poller.rb +150 -0
  71. data/lib/deimos/utils/db_producer.rb +243 -0
  72. data/lib/deimos/utils/deadlock_retry.rb +68 -0
  73. data/lib/deimos/utils/inline_consumer.rb +150 -0
  74. data/lib/deimos/utils/lag_reporter.rb +175 -0
  75. data/lib/deimos/utils/schema_controller_mixin.rb +115 -0
  76. data/lib/deimos/version.rb +5 -0
  77. data/lib/generators/deimos/active_record/templates/migration.rb.tt +28 -0
  78. data/lib/generators/deimos/active_record/templates/model.rb.tt +5 -0
  79. data/lib/generators/deimos/active_record_generator.rb +79 -0
  80. data/lib/generators/deimos/db_backend/templates/migration +25 -0
  81. data/lib/generators/deimos/db_backend/templates/rails3_migration +31 -0
  82. data/lib/generators/deimos/db_backend_generator.rb +48 -0
  83. data/lib/generators/deimos/db_poller/templates/migration +11 -0
  84. data/lib/generators/deimos/db_poller/templates/rails3_migration +16 -0
  85. data/lib/generators/deimos/db_poller_generator.rb +48 -0
  86. data/lib/tasks/deimos.rake +34 -0
  87. data/spec/active_record_batch_consumer_spec.rb +481 -0
  88. data/spec/active_record_consume/batch_slicer_spec.rb +42 -0
  89. data/spec/active_record_consume/schema_model_converter_spec.rb +105 -0
  90. data/spec/active_record_consumer_spec.rb +154 -0
  91. data/spec/active_record_producer_spec.rb +85 -0
  92. data/spec/backends/base_spec.rb +10 -0
  93. data/spec/backends/db_spec.rb +54 -0
  94. data/spec/backends/kafka_async_spec.rb +11 -0
  95. data/spec/backends/kafka_spec.rb +11 -0
  96. data/spec/batch_consumer_spec.rb +256 -0
  97. data/spec/config/configuration_spec.rb +248 -0
  98. data/spec/consumer_spec.rb +209 -0
  99. data/spec/deimos_spec.rb +169 -0
  100. data/spec/generators/active_record_generator_spec.rb +56 -0
  101. data/spec/handlers/my_batch_consumer.rb +10 -0
  102. data/spec/handlers/my_consumer.rb +10 -0
  103. data/spec/kafka_listener_spec.rb +55 -0
  104. data/spec/kafka_source_spec.rb +381 -0
  105. data/spec/kafka_topic_info_spec.rb +111 -0
  106. data/spec/message_spec.rb +19 -0
  107. data/spec/phobos.bad_db.yml +73 -0
  108. data/spec/phobos.yml +77 -0
  109. data/spec/producer_spec.rb +498 -0
  110. data/spec/rake_spec.rb +19 -0
  111. data/spec/schema_backends/avro_base_shared.rb +199 -0
  112. data/spec/schema_backends/avro_local_spec.rb +32 -0
  113. data/spec/schema_backends/avro_schema_registry_spec.rb +32 -0
  114. data/spec/schema_backends/avro_validation_spec.rb +24 -0
  115. data/spec/schema_backends/base_spec.rb +33 -0
  116. data/spec/schemas/com/my-namespace/Generated.avsc +71 -0
  117. data/spec/schemas/com/my-namespace/MyNestedSchema.avsc +62 -0
  118. data/spec/schemas/com/my-namespace/MySchema-key.avsc +13 -0
  119. data/spec/schemas/com/my-namespace/MySchema.avsc +18 -0
  120. data/spec/schemas/com/my-namespace/MySchemaCompound-key.avsc +18 -0
  121. data/spec/schemas/com/my-namespace/MySchemaWithBooleans.avsc +18 -0
  122. data/spec/schemas/com/my-namespace/MySchemaWithDateTimes.avsc +33 -0
  123. data/spec/schemas/com/my-namespace/MySchemaWithId.avsc +28 -0
  124. data/spec/schemas/com/my-namespace/MySchemaWithUniqueId.avsc +32 -0
  125. data/spec/schemas/com/my-namespace/Wibble.avsc +43 -0
  126. data/spec/schemas/com/my-namespace/Widget.avsc +27 -0
  127. data/spec/schemas/com/my-namespace/WidgetTheSecond.avsc +27 -0
  128. data/spec/schemas/com/my-namespace/request/CreateTopic.avsc +11 -0
  129. data/spec/schemas/com/my-namespace/request/Index.avsc +11 -0
  130. data/spec/schemas/com/my-namespace/request/UpdateRequest.avsc +11 -0
  131. data/spec/schemas/com/my-namespace/response/CreateTopic.avsc +11 -0
  132. data/spec/schemas/com/my-namespace/response/Index.avsc +11 -0
  133. data/spec/schemas/com/my-namespace/response/UpdateResponse.avsc +11 -0
  134. data/spec/spec_helper.rb +267 -0
  135. data/spec/utils/db_poller_spec.rb +320 -0
  136. data/spec/utils/db_producer_spec.rb +514 -0
  137. data/spec/utils/deadlock_retry_spec.rb +74 -0
  138. data/spec/utils/inline_consumer_spec.rb +31 -0
  139. data/spec/utils/lag_reporter_spec.rb +76 -0
  140. data/spec/utils/platform_schema_validation_spec.rb +0 -0
  141. data/spec/utils/schema_controller_mixin_spec.rb +84 -0
  142. data/support/deimos-solo.png +0 -0
  143. data/support/deimos-with-name-next.png +0 -0
  144. data/support/deimos-with-name.png +0 -0
  145. data/support/flipp-logo.png +0 -0
  146. metadata +551 -0
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'deimos/tracing/provider'
4
+
5
+ module Deimos
6
+ module Tracing
7
+ # Tracing wrapper class for Datadog.
8
+ class Datadog < Tracing::Provider
9
+ # :nodoc:
10
+ def initialize(config)
11
+ raise 'Tracing config must specify service_name' if config[:service_name].nil?
12
+
13
+ @service = config[:service_name]
14
+ end
15
+
16
+ # :nodoc:
17
+ def start(span_name, options={})
18
+ span = ::Datadog.tracer.trace(span_name)
19
+ span.service = @service
20
+ span.resource = options[:resource]
21
+ span
22
+ end
23
+
24
+ # :nodoc:
25
+ def finish(span)
26
+ span.finish
27
+ end
28
+
29
+ # :nodoc:
30
+ def set_error(span, exception)
31
+ span.set_error(exception)
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'deimos/tracing/provider'
4
+
5
+ module Deimos
6
+ module Tracing
7
+ # Class that mocks out tracing functionality
8
+ class Mock < Tracing::Provider
9
+ # :nodoc:
10
+ def initialize(logger=nil)
11
+ @logger = logger || Logger.new(STDOUT)
12
+ @logger.info('MockTracingProvider initialized')
13
+ end
14
+
15
+ # :nodoc:
16
+ def start(span_name, _options={})
17
+ @logger.info("Mock span '#{span_name}' started")
18
+ {
19
+ name: span_name,
20
+ started_at: Time.zone.now
21
+ }
22
+ end
23
+
24
+ # :nodoc:
25
+ def finish(span)
26
+ name = span[:name]
27
+ start = span[:started_at]
28
+ finish = Time.zone.now
29
+ @logger.info("Mock span '#{name}' finished: #{start} to #{finish}")
30
+ end
31
+
32
+ # :nodoc:
33
+ def set_error(span, exception)
34
+ span[:exception] = exception
35
+ name = span[:name]
36
+ @logger.info("Mock span '#{name}' set an error: #{exception}")
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ module Tracing
5
+ # Base class for all tracing providers.
6
+ class Provider
7
+ # Returns a span object and starts the trace.
8
+ # @param span_name [String] The name of the span/trace
9
+ # @param options [Hash] Options for the span
10
+ # @return [Object] The span object
11
+ def start(span_name, options={})
12
+ raise NotImplementedError
13
+ end
14
+
15
+ # Finishes the trace on the span object.
16
+ # @param span [Object] The span to finish trace on
17
+ def finish(span)
18
+ raise NotImplementedError
19
+ end
20
+
21
+ # Set an error on the span.
22
+ # @param span [Object] The span to set error on
23
+ # @param exception [Exception] The exception that occurred
24
+ def set_error(span, exception)
25
+ raise NotImplementedError
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,150 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'deimos/poll_info'
4
+ require 'sigurd/executor'
5
+ require 'sigurd/signal_handler'
6
+
7
+ module Deimos
8
+ module Utils
9
+ # Class which continually polls the database and sends Kafka messages.
10
+ class DbPoller
11
+ BATCH_SIZE = 1000
12
+
13
+ # Needed for Executor so it can identify the worker
14
+ attr_reader :id
15
+
16
+ # Begin the DB Poller process.
17
+ def self.start!
18
+ if Deimos.config.db_poller_objects.empty?
19
+ raise('No pollers configured!')
20
+ end
21
+
22
+ pollers = Deimos.config.db_poller_objects.map do |poller_config|
23
+ self.new(poller_config)
24
+ end
25
+ executor = Sigurd::Executor.new(pollers,
26
+ sleep_seconds: 5,
27
+ logger: Deimos.config.logger)
28
+ signal_handler = Sigurd::SignalHandler.new(executor)
29
+ signal_handler.run!
30
+ end
31
+
32
+ # @param config [Deimos::Configuration::ConfigStruct]
33
+ def initialize(config)
34
+ @config = config
35
+ @id = SecureRandom.hex
36
+ begin
37
+ @producer = @config.producer_class.constantize
38
+ rescue NameError
39
+ raise "Class #{@config.producer_class} not found!"
40
+ end
41
+ unless @producer < Deimos::ActiveRecordProducer
42
+ raise "Class #{@producer.class.name} is not an ActiveRecordProducer!"
43
+ end
44
+ end
45
+
46
+ # Start the poll:
47
+ # 1) Grab the current PollInfo from the database indicating the last
48
+ # time we ran
49
+ # 2) On a loop, process all the recent updates between the last time
50
+ # we ran and now.
51
+ def start
52
+ # Don't send asynchronously
53
+ if Deimos.config.producers.backend == :kafka_async
54
+ Deimos.config.producers.backend = :kafka
55
+ end
56
+ Deimos.config.logger.info('Starting...')
57
+ @signal_to_stop = false
58
+ retrieve_poll_info
59
+ loop do
60
+ if @signal_to_stop
61
+ Deimos.config.logger.info('Shutting down')
62
+ break
63
+ end
64
+ process_updates
65
+ sleep 0.1
66
+ end
67
+ end
68
+
69
+ # Grab the PollInfo or create if it doesn't exist.
70
+ def retrieve_poll_info
71
+ ActiveRecord::Base.connection.reconnect! unless ActiveRecord::Base.connection.open_transactions.positive?
72
+ new_time = @config.start_from_beginning ? Time.new(0) : Time.zone.now
73
+ @info = Deimos::PollInfo.find_by_producer(@config.producer_class) ||
74
+ Deimos::PollInfo.create!(producer: @config.producer_class,
75
+ last_sent: new_time,
76
+ last_sent_id: 0)
77
+ end
78
+
79
+ # Stop the poll.
80
+ def stop
81
+ Deimos.config.logger.info('Received signal to stop')
82
+ @signal_to_stop = true
83
+ end
84
+
85
+ # Indicate whether this current loop should process updates. Most loops
86
+ # will busy-wait (sleeping 0.1 seconds) until it's ready.
87
+ # @return [Boolean]
88
+ def should_run?
89
+ Time.zone.now - @info.last_sent - @config.delay_time >= @config.run_every
90
+ end
91
+
92
+ # @param record [ActiveRecord::Base]
93
+ # @return [ActiveSupport::TimeWithZone]
94
+ def last_updated(record)
95
+ record.public_send(@config.timestamp_column)
96
+ end
97
+
98
+ # Send messages for updated data.
99
+ def process_updates
100
+ return unless should_run?
101
+
102
+ time_from = @config.full_table ? Time.new(0) : @info.last_sent.in_time_zone
103
+ time_to = Time.zone.now - @config.delay_time
104
+ Deimos.config.logger.info("Polling #{@producer.topic} from #{time_from} to #{time_to}")
105
+ message_count = 0
106
+ batch_count = 0
107
+
108
+ # poll_query gets all the relevant data from the database, as defined
109
+ # by the producer itself.
110
+ loop do
111
+ Deimos.config.logger.debug("Polling #{@producer.topic}, batch #{batch_count + 1}")
112
+ batch = fetch_results(time_from, time_to).to_a
113
+ break if batch.empty?
114
+
115
+ batch_count += 1
116
+ process_batch(batch)
117
+ message_count += batch.size
118
+ time_from = last_updated(batch.last)
119
+ end
120
+ Deimos.config.logger.info("Poll #{@producer.topic} complete at #{time_to} (#{message_count} messages, #{batch_count} batches}")
121
+ end
122
+
123
+ # @param time_from [ActiveSupport::TimeWithZone]
124
+ # @param time_to [ActiveSupport::TimeWithZone]
125
+ # @return [ActiveRecord::Relation]
126
+ def fetch_results(time_from, time_to)
127
+ id = @producer.config[:record_class].primary_key
128
+ quoted_timestamp = ActiveRecord::Base.connection.quote_column_name(@config.timestamp_column)
129
+ quoted_id = ActiveRecord::Base.connection.quote_column_name(id)
130
+ @producer.poll_query(time_from: time_from,
131
+ time_to: time_to,
132
+ column_name: @config.timestamp_column,
133
+ min_id: @info.last_sent_id).
134
+ limit(BATCH_SIZE).
135
+ order("#{quoted_timestamp}, #{quoted_id}")
136
+ end
137
+
138
+ # @param batch [Array<ActiveRecord::Base>]
139
+ def process_batch(batch)
140
+ record = batch.last
141
+ id_method = record.class.primary_key
142
+ last_id = record.public_send(id_method)
143
+ last_updated_at = last_updated(record)
144
+ @producer.send_events(batch)
145
+ @info.attributes = { last_sent: last_updated_at, last_sent_id: last_id }
146
+ @info.save!
147
+ end
148
+ end
149
+ end
150
+ end
@@ -0,0 +1,243 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ module Utils
5
+ # Class which continually polls the kafka_messages table
6
+ # in the database and sends Kafka messages.
7
+ class DbProducer
8
+ include Phobos::Producer
9
+ attr_accessor :id, :current_topic
10
+
11
+ BATCH_SIZE = 1000
12
+ DELETE_BATCH_SIZE = 10
13
+ MAX_DELETE_ATTEMPTS = 3
14
+
15
+ # @param logger [Logger]
16
+ def initialize(logger=Logger.new(STDOUT))
17
+ @id = SecureRandom.uuid
18
+ @logger = logger
19
+ @logger.push_tags("DbProducer #{@id}") if @logger.respond_to?(:push_tags)
20
+ end
21
+
22
+ # @return [Deimos::DbProducerConfig]
23
+ def config
24
+ Deimos.config.db_producer
25
+ end
26
+
27
+ # Start the poll.
28
+ def start
29
+ @logger.info('Starting...')
30
+ @signal_to_stop = false
31
+ ActiveRecord::Base.connection.reconnect!
32
+ loop do
33
+ if @signal_to_stop
34
+ @logger.info('Shutting down')
35
+ break
36
+ end
37
+ send_pending_metrics
38
+ process_next_messages
39
+ end
40
+ end
41
+
42
+ # Stop the poll.
43
+ def stop
44
+ @logger.info('Received signal to stop')
45
+ @signal_to_stop = true
46
+ end
47
+
48
+ # Complete one loop of processing all messages in the DB.
49
+ def process_next_messages
50
+ topics = retrieve_topics
51
+ @logger.info("Found topics: #{topics}")
52
+ topics.each(&method(:process_topic))
53
+ KafkaTopicInfo.ping_empty_topics(topics)
54
+ sleep(0.5)
55
+ end
56
+
57
+ # @return [Array<String>]
58
+ def retrieve_topics
59
+ KafkaMessage.select('distinct topic').map(&:topic).uniq
60
+ end
61
+
62
+ # @param topic [String]
63
+ # @return [String, nil] the topic that was locked, or nil if none were.
64
+ def process_topic(topic)
65
+ # If the topic is already locked, another producer is currently
66
+ # working on it. Move on to the next one.
67
+ unless KafkaTopicInfo.lock(topic, @id)
68
+ @logger.debug("Could not lock topic #{topic} - continuing")
69
+ return
70
+ end
71
+ @current_topic = topic
72
+
73
+ loop { break unless process_topic_batch }
74
+
75
+ KafkaTopicInfo.clear_lock(@current_topic, @id)
76
+ rescue StandardError => e
77
+ @logger.error("Error processing messages for topic #{@current_topic}: #{e.class.name}: #{e.message} #{e.backtrace.join("\n")}")
78
+ KafkaTopicInfo.register_error(@current_topic, @id)
79
+ shutdown_producer
80
+ end
81
+
82
+ # Process a single batch in a topic.
83
+ def process_topic_batch
84
+ messages = retrieve_messages
85
+ return false if messages.empty?
86
+
87
+ batch_size = messages.size
88
+ compacted_messages = compact_messages(messages)
89
+ log_messages(compacted_messages)
90
+ Deimos.instrument('db_producer.produce', topic: @current_topic, messages: compacted_messages) do
91
+ begin
92
+ produce_messages(compacted_messages.map(&:phobos_message))
93
+ rescue Kafka::BufferOverflow, Kafka::MessageSizeTooLarge, Kafka::RecordListTooLarge
94
+ delete_messages(messages)
95
+ @logger.error('Message batch too large, deleting...')
96
+ @logger.error(Deimos::KafkaMessage.decoded(messages))
97
+ raise
98
+ end
99
+ end
100
+ delete_messages(messages)
101
+ Deimos.config.metrics&.increment(
102
+ 'db_producer.process',
103
+ tags: %W(topic:#{@current_topic}),
104
+ by: messages.size
105
+ )
106
+ return false if batch_size < BATCH_SIZE
107
+
108
+ KafkaTopicInfo.heartbeat(@current_topic, @id) # keep alive
109
+ send_pending_metrics
110
+ true
111
+ end
112
+
113
+ # @param messages [Array<Deimos::KafkaMessage>]
114
+ def delete_messages(messages)
115
+ attempts = 1
116
+ begin
117
+ messages.in_groups_of(DELETE_BATCH_SIZE, false).each do |batch|
118
+ Deimos::KafkaMessage.where(topic: batch.first.topic,
119
+ id: batch.map(&:id)).
120
+ delete_all
121
+ end
122
+ rescue StandardError => e
123
+ if (e.message =~ /Lock wait/i || e.message =~ /Lost connection/i) &&
124
+ attempts <= MAX_DELETE_ATTEMPTS
125
+ attempts += 1
126
+ ActiveRecord::Base.connection.verify!
127
+ sleep(1)
128
+ retry
129
+ end
130
+ raise
131
+ end
132
+ end
133
+
134
+ # @return [Array<Deimos::KafkaMessage>]
135
+ def retrieve_messages
136
+ KafkaMessage.where(topic: @current_topic).order(:id).limit(BATCH_SIZE)
137
+ end
138
+
139
+ # @param messages [Array<Deimos::KafkaMessage>]
140
+ def log_messages(messages)
141
+ return if config.log_topics != :all && !config.log_topics.include?(@current_topic)
142
+
143
+ @logger.debug do
144
+ decoded_messages = Deimos::KafkaMessage.decoded(messages)
145
+ "DB producer: Topic #{@current_topic} Producing messages: #{decoded_messages}}"
146
+ end
147
+ end
148
+
149
+ # Send metrics to Datadog.
150
+ def send_pending_metrics
151
+ metrics = Deimos.config.metrics
152
+ return unless metrics
153
+
154
+ topics = KafkaTopicInfo.select(%w(topic last_processed_at))
155
+ messages = Deimos::KafkaMessage.
156
+ select('count(*) as num_messages, min(created_at) as earliest, topic').
157
+ group(:topic).
158
+ index_by(&:topic)
159
+ topics.each do |record|
160
+ message_record = messages[record.topic]
161
+ # We want to record the last time we saw any activity, meaning either
162
+ # the oldest message, or the last time we processed, whichever comes
163
+ # last.
164
+ if message_record
165
+ record_earliest = message_record.earliest
166
+ # SQLite gives a string here
167
+ if record_earliest.is_a?(String)
168
+ record_earliest = Time.zone.parse(record_earliest)
169
+ end
170
+
171
+ earliest = [record.last_processed_at, record_earliest].max
172
+ time_diff = Time.zone.now - earliest
173
+ metrics.gauge('pending_db_messages_max_wait', time_diff,
174
+ tags: ["topic:#{record.topic}"])
175
+ else
176
+ # no messages waiting
177
+ metrics.gauge('pending_db_messages_max_wait', 0,
178
+ tags: ["topic:#{record.topic}"])
179
+ end
180
+ metrics.gauge('pending_db_messages_count', message_record&.num_messages || 0,
181
+ tags: ["topic:#{record.topic}"])
182
+ end
183
+ end
184
+
185
+ # Shut down the sync producer if we have to. Phobos will automatically
186
+ # create a new one. We should call this if the producer can be in a bad
187
+ # state and e.g. we need to clear the buffer.
188
+ def shutdown_producer
189
+ if self.class.producer.respond_to?(:sync_producer_shutdown) # Phobos 1.8.3
190
+ self.class.producer.sync_producer_shutdown
191
+ end
192
+ end
193
+
194
+ # Produce messages in batches, reducing the size 1/10 if the batch is too
195
+ # large. Does not retry batches of messages that have already been sent.
196
+ # @param batch [Array<Hash>]
197
+ def produce_messages(batch)
198
+ batch_size = batch.size
199
+ current_index = 0
200
+ begin
201
+ batch[current_index..-1].in_groups_of(batch_size, false).each do |group|
202
+ @logger.debug("Publishing #{group.size} messages to #{@current_topic}")
203
+ producer.publish_list(group)
204
+ Deimos.config.metrics&.increment(
205
+ 'publish',
206
+ tags: %W(status:success topic:#{@current_topic}),
207
+ by: group.size
208
+ )
209
+ current_index += group.size
210
+ @logger.info("Sent #{group.size} messages to #{@current_topic}")
211
+ end
212
+ rescue Kafka::BufferOverflow, Kafka::MessageSizeTooLarge,
213
+ Kafka::RecordListTooLarge => e
214
+ if batch_size == 1
215
+ shutdown_producer
216
+ raise
217
+ end
218
+
219
+ @logger.error("Got error #{e.class.name} when publishing #{batch.size} in groups of #{batch_size}, retrying...")
220
+ batch_size = if batch_size < 10
221
+ 1
222
+ else
223
+ (batch_size / 10).to_i
224
+ end
225
+ shutdown_producer
226
+ retry
227
+ end
228
+ end
229
+
230
+ # @param batch [Array<Deimos::KafkaMessage>]
231
+ # @return [Array<Deimos::KafkaMessage>]
232
+ def compact_messages(batch)
233
+ return batch if batch.first&.key.blank?
234
+
235
+ topic = batch.first.topic
236
+ return batch if config.compact_topics != :all &&
237
+ !config.compact_topics.include?(topic)
238
+
239
+ batch.reverse.uniq(&:key).reverse!
240
+ end
241
+ end
242
+ end
243
+ end