deimos-temp-fork 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. checksums.yaml +7 -0
  2. data/.circleci/config.yml +83 -0
  3. data/.gitignore +41 -0
  4. data/.gitmodules +0 -0
  5. data/.rspec +1 -0
  6. data/.rubocop.yml +333 -0
  7. data/.ruby-gemset +1 -0
  8. data/.ruby-version +1 -0
  9. data/CHANGELOG.md +349 -0
  10. data/CODE_OF_CONDUCT.md +77 -0
  11. data/Dockerfile +23 -0
  12. data/Gemfile +6 -0
  13. data/Gemfile.lock +286 -0
  14. data/Guardfile +22 -0
  15. data/LICENSE.md +195 -0
  16. data/README.md +1099 -0
  17. data/Rakefile +13 -0
  18. data/bin/deimos +4 -0
  19. data/deimos-ruby.gemspec +44 -0
  20. data/docker-compose.yml +71 -0
  21. data/docs/ARCHITECTURE.md +140 -0
  22. data/docs/CONFIGURATION.md +236 -0
  23. data/docs/DATABASE_BACKEND.md +147 -0
  24. data/docs/INTEGRATION_TESTS.md +52 -0
  25. data/docs/PULL_REQUEST_TEMPLATE.md +35 -0
  26. data/docs/UPGRADING.md +128 -0
  27. data/lib/deimos-temp-fork.rb +95 -0
  28. data/lib/deimos/active_record_consume/batch_consumption.rb +164 -0
  29. data/lib/deimos/active_record_consume/batch_slicer.rb +27 -0
  30. data/lib/deimos/active_record_consume/message_consumption.rb +79 -0
  31. data/lib/deimos/active_record_consume/schema_model_converter.rb +52 -0
  32. data/lib/deimos/active_record_consumer.rb +67 -0
  33. data/lib/deimos/active_record_producer.rb +87 -0
  34. data/lib/deimos/backends/base.rb +32 -0
  35. data/lib/deimos/backends/db.rb +41 -0
  36. data/lib/deimos/backends/kafka.rb +33 -0
  37. data/lib/deimos/backends/kafka_async.rb +33 -0
  38. data/lib/deimos/backends/test.rb +20 -0
  39. data/lib/deimos/batch_consumer.rb +7 -0
  40. data/lib/deimos/config/configuration.rb +381 -0
  41. data/lib/deimos/config/phobos_config.rb +137 -0
  42. data/lib/deimos/consume/batch_consumption.rb +150 -0
  43. data/lib/deimos/consume/message_consumption.rb +94 -0
  44. data/lib/deimos/consumer.rb +104 -0
  45. data/lib/deimos/instrumentation.rb +76 -0
  46. data/lib/deimos/kafka_message.rb +60 -0
  47. data/lib/deimos/kafka_source.rb +128 -0
  48. data/lib/deimos/kafka_topic_info.rb +102 -0
  49. data/lib/deimos/message.rb +79 -0
  50. data/lib/deimos/metrics/datadog.rb +47 -0
  51. data/lib/deimos/metrics/mock.rb +39 -0
  52. data/lib/deimos/metrics/provider.rb +36 -0
  53. data/lib/deimos/monkey_patches/phobos_cli.rb +35 -0
  54. data/lib/deimos/monkey_patches/phobos_producer.rb +51 -0
  55. data/lib/deimos/poll_info.rb +9 -0
  56. data/lib/deimos/producer.rb +224 -0
  57. data/lib/deimos/railtie.rb +8 -0
  58. data/lib/deimos/schema_backends/avro_base.rb +140 -0
  59. data/lib/deimos/schema_backends/avro_local.rb +30 -0
  60. data/lib/deimos/schema_backends/avro_schema_coercer.rb +119 -0
  61. data/lib/deimos/schema_backends/avro_schema_registry.rb +34 -0
  62. data/lib/deimos/schema_backends/avro_validation.rb +21 -0
  63. data/lib/deimos/schema_backends/base.rb +150 -0
  64. data/lib/deimos/schema_backends/mock.rb +42 -0
  65. data/lib/deimos/shared_config.rb +63 -0
  66. data/lib/deimos/test_helpers.rb +360 -0
  67. data/lib/deimos/tracing/datadog.rb +35 -0
  68. data/lib/deimos/tracing/mock.rb +40 -0
  69. data/lib/deimos/tracing/provider.rb +29 -0
  70. data/lib/deimos/utils/db_poller.rb +150 -0
  71. data/lib/deimos/utils/db_producer.rb +243 -0
  72. data/lib/deimos/utils/deadlock_retry.rb +68 -0
  73. data/lib/deimos/utils/inline_consumer.rb +150 -0
  74. data/lib/deimos/utils/lag_reporter.rb +175 -0
  75. data/lib/deimos/utils/schema_controller_mixin.rb +115 -0
  76. data/lib/deimos/version.rb +5 -0
  77. data/lib/generators/deimos/active_record/templates/migration.rb.tt +28 -0
  78. data/lib/generators/deimos/active_record/templates/model.rb.tt +5 -0
  79. data/lib/generators/deimos/active_record_generator.rb +79 -0
  80. data/lib/generators/deimos/db_backend/templates/migration +25 -0
  81. data/lib/generators/deimos/db_backend/templates/rails3_migration +31 -0
  82. data/lib/generators/deimos/db_backend_generator.rb +48 -0
  83. data/lib/generators/deimos/db_poller/templates/migration +11 -0
  84. data/lib/generators/deimos/db_poller/templates/rails3_migration +16 -0
  85. data/lib/generators/deimos/db_poller_generator.rb +48 -0
  86. data/lib/tasks/deimos.rake +34 -0
  87. data/spec/active_record_batch_consumer_spec.rb +481 -0
  88. data/spec/active_record_consume/batch_slicer_spec.rb +42 -0
  89. data/spec/active_record_consume/schema_model_converter_spec.rb +105 -0
  90. data/spec/active_record_consumer_spec.rb +154 -0
  91. data/spec/active_record_producer_spec.rb +85 -0
  92. data/spec/backends/base_spec.rb +10 -0
  93. data/spec/backends/db_spec.rb +54 -0
  94. data/spec/backends/kafka_async_spec.rb +11 -0
  95. data/spec/backends/kafka_spec.rb +11 -0
  96. data/spec/batch_consumer_spec.rb +256 -0
  97. data/spec/config/configuration_spec.rb +248 -0
  98. data/spec/consumer_spec.rb +209 -0
  99. data/spec/deimos_spec.rb +169 -0
  100. data/spec/generators/active_record_generator_spec.rb +56 -0
  101. data/spec/handlers/my_batch_consumer.rb +10 -0
  102. data/spec/handlers/my_consumer.rb +10 -0
  103. data/spec/kafka_listener_spec.rb +55 -0
  104. data/spec/kafka_source_spec.rb +381 -0
  105. data/spec/kafka_topic_info_spec.rb +111 -0
  106. data/spec/message_spec.rb +19 -0
  107. data/spec/phobos.bad_db.yml +73 -0
  108. data/spec/phobos.yml +77 -0
  109. data/spec/producer_spec.rb +498 -0
  110. data/spec/rake_spec.rb +19 -0
  111. data/spec/schema_backends/avro_base_shared.rb +199 -0
  112. data/spec/schema_backends/avro_local_spec.rb +32 -0
  113. data/spec/schema_backends/avro_schema_registry_spec.rb +32 -0
  114. data/spec/schema_backends/avro_validation_spec.rb +24 -0
  115. data/spec/schema_backends/base_spec.rb +33 -0
  116. data/spec/schemas/com/my-namespace/Generated.avsc +71 -0
  117. data/spec/schemas/com/my-namespace/MyNestedSchema.avsc +62 -0
  118. data/spec/schemas/com/my-namespace/MySchema-key.avsc +13 -0
  119. data/spec/schemas/com/my-namespace/MySchema.avsc +18 -0
  120. data/spec/schemas/com/my-namespace/MySchemaCompound-key.avsc +18 -0
  121. data/spec/schemas/com/my-namespace/MySchemaWithBooleans.avsc +18 -0
  122. data/spec/schemas/com/my-namespace/MySchemaWithDateTimes.avsc +33 -0
  123. data/spec/schemas/com/my-namespace/MySchemaWithId.avsc +28 -0
  124. data/spec/schemas/com/my-namespace/MySchemaWithUniqueId.avsc +32 -0
  125. data/spec/schemas/com/my-namespace/Wibble.avsc +43 -0
  126. data/spec/schemas/com/my-namespace/Widget.avsc +27 -0
  127. data/spec/schemas/com/my-namespace/WidgetTheSecond.avsc +27 -0
  128. data/spec/schemas/com/my-namespace/request/CreateTopic.avsc +11 -0
  129. data/spec/schemas/com/my-namespace/request/Index.avsc +11 -0
  130. data/spec/schemas/com/my-namespace/request/UpdateRequest.avsc +11 -0
  131. data/spec/schemas/com/my-namespace/response/CreateTopic.avsc +11 -0
  132. data/spec/schemas/com/my-namespace/response/Index.avsc +11 -0
  133. data/spec/schemas/com/my-namespace/response/UpdateResponse.avsc +11 -0
  134. data/spec/spec_helper.rb +267 -0
  135. data/spec/utils/db_poller_spec.rb +320 -0
  136. data/spec/utils/db_producer_spec.rb +514 -0
  137. data/spec/utils/deadlock_retry_spec.rb +74 -0
  138. data/spec/utils/inline_consumer_spec.rb +31 -0
  139. data/spec/utils/lag_reporter_spec.rb +76 -0
  140. data/spec/utils/platform_schema_validation_spec.rb +0 -0
  141. data/spec/utils/schema_controller_mixin_spec.rb +84 -0
  142. data/support/deimos-solo.png +0 -0
  143. data/support/deimos-with-name-next.png +0 -0
  144. data/support/deimos-with-name.png +0 -0
  145. data/support/flipp-logo.png +0 -0
  146. metadata +551 -0
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'deimos/tracing/provider'
4
+
5
+ module Deimos
6
+ module Tracing
7
+ # Tracing wrapper class for Datadog.
8
+ class Datadog < Tracing::Provider
9
+ # :nodoc:
10
+ def initialize(config)
11
+ raise 'Tracing config must specify service_name' if config[:service_name].nil?
12
+
13
+ @service = config[:service_name]
14
+ end
15
+
16
+ # :nodoc:
17
+ def start(span_name, options={})
18
+ span = ::Datadog.tracer.trace(span_name)
19
+ span.service = @service
20
+ span.resource = options[:resource]
21
+ span
22
+ end
23
+
24
+ # :nodoc:
25
+ def finish(span)
26
+ span.finish
27
+ end
28
+
29
+ # :nodoc:
30
+ def set_error(span, exception)
31
+ span.set_error(exception)
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'deimos/tracing/provider'
4
+
5
+ module Deimos
6
+ module Tracing
7
+ # Class that mocks out tracing functionality
8
+ class Mock < Tracing::Provider
9
+ # :nodoc:
10
+ def initialize(logger=nil)
11
+ @logger = logger || Logger.new(STDOUT)
12
+ @logger.info('MockTracingProvider initialized')
13
+ end
14
+
15
+ # :nodoc:
16
+ def start(span_name, _options={})
17
+ @logger.info("Mock span '#{span_name}' started")
18
+ {
19
+ name: span_name,
20
+ started_at: Time.zone.now
21
+ }
22
+ end
23
+
24
+ # :nodoc:
25
+ def finish(span)
26
+ name = span[:name]
27
+ start = span[:started_at]
28
+ finish = Time.zone.now
29
+ @logger.info("Mock span '#{name}' finished: #{start} to #{finish}")
30
+ end
31
+
32
+ # :nodoc:
33
+ def set_error(span, exception)
34
+ span[:exception] = exception
35
+ name = span[:name]
36
+ @logger.info("Mock span '#{name}' set an error: #{exception}")
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ module Tracing
5
+ # Base class for all tracing providers.
6
+ class Provider
7
+ # Returns a span object and starts the trace.
8
+ # @param span_name [String] The name of the span/trace
9
+ # @param options [Hash] Options for the span
10
+ # @return [Object] The span object
11
+ def start(span_name, options={})
12
+ raise NotImplementedError
13
+ end
14
+
15
+ # Finishes the trace on the span object.
16
+ # @param span [Object] The span to finish trace on
17
+ def finish(span)
18
+ raise NotImplementedError
19
+ end
20
+
21
+ # Set an error on the span.
22
+ # @param span [Object] The span to set error on
23
+ # @param exception [Exception] The exception that occurred
24
+ def set_error(span, exception)
25
+ raise NotImplementedError
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,150 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'deimos/poll_info'
4
+ require 'sigurd/executor'
5
+ require 'sigurd/signal_handler'
6
+
7
+ module Deimos
8
+ module Utils
9
+ # Class which continually polls the database and sends Kafka messages.
10
+ class DbPoller
11
+ BATCH_SIZE = 1000
12
+
13
+ # Needed for Executor so it can identify the worker
14
+ attr_reader :id
15
+
16
+ # Begin the DB Poller process.
17
+ def self.start!
18
+ if Deimos.config.db_poller_objects.empty?
19
+ raise('No pollers configured!')
20
+ end
21
+
22
+ pollers = Deimos.config.db_poller_objects.map do |poller_config|
23
+ self.new(poller_config)
24
+ end
25
+ executor = Sigurd::Executor.new(pollers,
26
+ sleep_seconds: 5,
27
+ logger: Deimos.config.logger)
28
+ signal_handler = Sigurd::SignalHandler.new(executor)
29
+ signal_handler.run!
30
+ end
31
+
32
+ # @param config [Deimos::Configuration::ConfigStruct]
33
+ def initialize(config)
34
+ @config = config
35
+ @id = SecureRandom.hex
36
+ begin
37
+ @producer = @config.producer_class.constantize
38
+ rescue NameError
39
+ raise "Class #{@config.producer_class} not found!"
40
+ end
41
+ unless @producer < Deimos::ActiveRecordProducer
42
+ raise "Class #{@producer.class.name} is not an ActiveRecordProducer!"
43
+ end
44
+ end
45
+
46
+ # Start the poll:
47
+ # 1) Grab the current PollInfo from the database indicating the last
48
+ # time we ran
49
+ # 2) On a loop, process all the recent updates between the last time
50
+ # we ran and now.
51
+ def start
52
+ # Don't send asynchronously
53
+ if Deimos.config.producers.backend == :kafka_async
54
+ Deimos.config.producers.backend = :kafka
55
+ end
56
+ Deimos.config.logger.info('Starting...')
57
+ @signal_to_stop = false
58
+ retrieve_poll_info
59
+ loop do
60
+ if @signal_to_stop
61
+ Deimos.config.logger.info('Shutting down')
62
+ break
63
+ end
64
+ process_updates
65
+ sleep 0.1
66
+ end
67
+ end
68
+
69
+ # Grab the PollInfo or create if it doesn't exist.
70
+ def retrieve_poll_info
71
+ ActiveRecord::Base.connection.reconnect! unless ActiveRecord::Base.connection.open_transactions.positive?
72
+ new_time = @config.start_from_beginning ? Time.new(0) : Time.zone.now
73
+ @info = Deimos::PollInfo.find_by_producer(@config.producer_class) ||
74
+ Deimos::PollInfo.create!(producer: @config.producer_class,
75
+ last_sent: new_time,
76
+ last_sent_id: 0)
77
+ end
78
+
79
+ # Stop the poll.
80
+ def stop
81
+ Deimos.config.logger.info('Received signal to stop')
82
+ @signal_to_stop = true
83
+ end
84
+
85
+ # Indicate whether this current loop should process updates. Most loops
86
+ # will busy-wait (sleeping 0.1 seconds) until it's ready.
87
+ # @return [Boolean]
88
+ def should_run?
89
+ Time.zone.now - @info.last_sent - @config.delay_time >= @config.run_every
90
+ end
91
+
92
+ # @param record [ActiveRecord::Base]
93
+ # @return [ActiveSupport::TimeWithZone]
94
+ def last_updated(record)
95
+ record.public_send(@config.timestamp_column)
96
+ end
97
+
98
+ # Send messages for updated data.
99
+ def process_updates
100
+ return unless should_run?
101
+
102
+ time_from = @config.full_table ? Time.new(0) : @info.last_sent.in_time_zone
103
+ time_to = Time.zone.now - @config.delay_time
104
+ Deimos.config.logger.info("Polling #{@producer.topic} from #{time_from} to #{time_to}")
105
+ message_count = 0
106
+ batch_count = 0
107
+
108
+ # poll_query gets all the relevant data from the database, as defined
109
+ # by the producer itself.
110
+ loop do
111
+ Deimos.config.logger.debug("Polling #{@producer.topic}, batch #{batch_count + 1}")
112
+ batch = fetch_results(time_from, time_to).to_a
113
+ break if batch.empty?
114
+
115
+ batch_count += 1
116
+ process_batch(batch)
117
+ message_count += batch.size
118
+ time_from = last_updated(batch.last)
119
+ end
120
+ Deimos.config.logger.info("Poll #{@producer.topic} complete at #{time_to} (#{message_count} messages, #{batch_count} batches}")
121
+ end
122
+
123
+ # @param time_from [ActiveSupport::TimeWithZone]
124
+ # @param time_to [ActiveSupport::TimeWithZone]
125
+ # @return [ActiveRecord::Relation]
126
+ def fetch_results(time_from, time_to)
127
+ id = @producer.config[:record_class].primary_key
128
+ quoted_timestamp = ActiveRecord::Base.connection.quote_column_name(@config.timestamp_column)
129
+ quoted_id = ActiveRecord::Base.connection.quote_column_name(id)
130
+ @producer.poll_query(time_from: time_from,
131
+ time_to: time_to,
132
+ column_name: @config.timestamp_column,
133
+ min_id: @info.last_sent_id).
134
+ limit(BATCH_SIZE).
135
+ order("#{quoted_timestamp}, #{quoted_id}")
136
+ end
137
+
138
+ # @param batch [Array<ActiveRecord::Base>]
139
+ def process_batch(batch)
140
+ record = batch.last
141
+ id_method = record.class.primary_key
142
+ last_id = record.public_send(id_method)
143
+ last_updated_at = last_updated(record)
144
+ @producer.send_events(batch)
145
+ @info.attributes = { last_sent: last_updated_at, last_sent_id: last_id }
146
+ @info.save!
147
+ end
148
+ end
149
+ end
150
+ end
@@ -0,0 +1,243 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ module Utils
5
+ # Class which continually polls the kafka_messages table
6
+ # in the database and sends Kafka messages.
7
+ class DbProducer
8
+ include Phobos::Producer
9
+ attr_accessor :id, :current_topic
10
+
11
+ BATCH_SIZE = 1000
12
+ DELETE_BATCH_SIZE = 10
13
+ MAX_DELETE_ATTEMPTS = 3
14
+
15
+ # @param logger [Logger]
16
+ def initialize(logger=Logger.new(STDOUT))
17
+ @id = SecureRandom.uuid
18
+ @logger = logger
19
+ @logger.push_tags("DbProducer #{@id}") if @logger.respond_to?(:push_tags)
20
+ end
21
+
22
+ # @return [Deimos::DbProducerConfig]
23
+ def config
24
+ Deimos.config.db_producer
25
+ end
26
+
27
+ # Start the poll.
28
+ def start
29
+ @logger.info('Starting...')
30
+ @signal_to_stop = false
31
+ ActiveRecord::Base.connection.reconnect!
32
+ loop do
33
+ if @signal_to_stop
34
+ @logger.info('Shutting down')
35
+ break
36
+ end
37
+ send_pending_metrics
38
+ process_next_messages
39
+ end
40
+ end
41
+
42
+ # Stop the poll.
43
+ def stop
44
+ @logger.info('Received signal to stop')
45
+ @signal_to_stop = true
46
+ end
47
+
48
+ # Complete one loop of processing all messages in the DB.
49
+ def process_next_messages
50
+ topics = retrieve_topics
51
+ @logger.info("Found topics: #{topics}")
52
+ topics.each(&method(:process_topic))
53
+ KafkaTopicInfo.ping_empty_topics(topics)
54
+ sleep(0.5)
55
+ end
56
+
57
+ # @return [Array<String>]
58
+ def retrieve_topics
59
+ KafkaMessage.select('distinct topic').map(&:topic).uniq
60
+ end
61
+
62
+ # @param topic [String]
63
+ # @return [String, nil] the topic that was locked, or nil if none were.
64
+ def process_topic(topic)
65
+ # If the topic is already locked, another producer is currently
66
+ # working on it. Move on to the next one.
67
+ unless KafkaTopicInfo.lock(topic, @id)
68
+ @logger.debug("Could not lock topic #{topic} - continuing")
69
+ return
70
+ end
71
+ @current_topic = topic
72
+
73
+ loop { break unless process_topic_batch }
74
+
75
+ KafkaTopicInfo.clear_lock(@current_topic, @id)
76
+ rescue StandardError => e
77
+ @logger.error("Error processing messages for topic #{@current_topic}: #{e.class.name}: #{e.message} #{e.backtrace.join("\n")}")
78
+ KafkaTopicInfo.register_error(@current_topic, @id)
79
+ shutdown_producer
80
+ end
81
+
82
+ # Process a single batch in a topic.
83
+ def process_topic_batch
84
+ messages = retrieve_messages
85
+ return false if messages.empty?
86
+
87
+ batch_size = messages.size
88
+ compacted_messages = compact_messages(messages)
89
+ log_messages(compacted_messages)
90
+ Deimos.instrument('db_producer.produce', topic: @current_topic, messages: compacted_messages) do
91
+ begin
92
+ produce_messages(compacted_messages.map(&:phobos_message))
93
+ rescue Kafka::BufferOverflow, Kafka::MessageSizeTooLarge, Kafka::RecordListTooLarge
94
+ delete_messages(messages)
95
+ @logger.error('Message batch too large, deleting...')
96
+ @logger.error(Deimos::KafkaMessage.decoded(messages))
97
+ raise
98
+ end
99
+ end
100
+ delete_messages(messages)
101
+ Deimos.config.metrics&.increment(
102
+ 'db_producer.process',
103
+ tags: %W(topic:#{@current_topic}),
104
+ by: messages.size
105
+ )
106
+ return false if batch_size < BATCH_SIZE
107
+
108
+ KafkaTopicInfo.heartbeat(@current_topic, @id) # keep alive
109
+ send_pending_metrics
110
+ true
111
+ end
112
+
113
+ # @param messages [Array<Deimos::KafkaMessage>]
114
+ def delete_messages(messages)
115
+ attempts = 1
116
+ begin
117
+ messages.in_groups_of(DELETE_BATCH_SIZE, false).each do |batch|
118
+ Deimos::KafkaMessage.where(topic: batch.first.topic,
119
+ id: batch.map(&:id)).
120
+ delete_all
121
+ end
122
+ rescue StandardError => e
123
+ if (e.message =~ /Lock wait/i || e.message =~ /Lost connection/i) &&
124
+ attempts <= MAX_DELETE_ATTEMPTS
125
+ attempts += 1
126
+ ActiveRecord::Base.connection.verify!
127
+ sleep(1)
128
+ retry
129
+ end
130
+ raise
131
+ end
132
+ end
133
+
134
+ # @return [Array<Deimos::KafkaMessage>]
135
+ def retrieve_messages
136
+ KafkaMessage.where(topic: @current_topic).order(:id).limit(BATCH_SIZE)
137
+ end
138
+
139
+ # @param messages [Array<Deimos::KafkaMessage>]
140
+ def log_messages(messages)
141
+ return if config.log_topics != :all && !config.log_topics.include?(@current_topic)
142
+
143
+ @logger.debug do
144
+ decoded_messages = Deimos::KafkaMessage.decoded(messages)
145
+ "DB producer: Topic #{@current_topic} Producing messages: #{decoded_messages}}"
146
+ end
147
+ end
148
+
149
+ # Send metrics to Datadog.
150
+ def send_pending_metrics
151
+ metrics = Deimos.config.metrics
152
+ return unless metrics
153
+
154
+ topics = KafkaTopicInfo.select(%w(topic last_processed_at))
155
+ messages = Deimos::KafkaMessage.
156
+ select('count(*) as num_messages, min(created_at) as earliest, topic').
157
+ group(:topic).
158
+ index_by(&:topic)
159
+ topics.each do |record|
160
+ message_record = messages[record.topic]
161
+ # We want to record the last time we saw any activity, meaning either
162
+ # the oldest message, or the last time we processed, whichever comes
163
+ # last.
164
+ if message_record
165
+ record_earliest = message_record.earliest
166
+ # SQLite gives a string here
167
+ if record_earliest.is_a?(String)
168
+ record_earliest = Time.zone.parse(record_earliest)
169
+ end
170
+
171
+ earliest = [record.last_processed_at, record_earliest].max
172
+ time_diff = Time.zone.now - earliest
173
+ metrics.gauge('pending_db_messages_max_wait', time_diff,
174
+ tags: ["topic:#{record.topic}"])
175
+ else
176
+ # no messages waiting
177
+ metrics.gauge('pending_db_messages_max_wait', 0,
178
+ tags: ["topic:#{record.topic}"])
179
+ end
180
+ metrics.gauge('pending_db_messages_count', message_record&.num_messages || 0,
181
+ tags: ["topic:#{record.topic}"])
182
+ end
183
+ end
184
+
185
+ # Shut down the sync producer if we have to. Phobos will automatically
186
+ # create a new one. We should call this if the producer can be in a bad
187
+ # state and e.g. we need to clear the buffer.
188
+ def shutdown_producer
189
+ if self.class.producer.respond_to?(:sync_producer_shutdown) # Phobos 1.8.3
190
+ self.class.producer.sync_producer_shutdown
191
+ end
192
+ end
193
+
194
+ # Produce messages in batches, reducing the size 1/10 if the batch is too
195
+ # large. Does not retry batches of messages that have already been sent.
196
+ # @param batch [Array<Hash>]
197
+ def produce_messages(batch)
198
+ batch_size = batch.size
199
+ current_index = 0
200
+ begin
201
+ batch[current_index..-1].in_groups_of(batch_size, false).each do |group|
202
+ @logger.debug("Publishing #{group.size} messages to #{@current_topic}")
203
+ producer.publish_list(group)
204
+ Deimos.config.metrics&.increment(
205
+ 'publish',
206
+ tags: %W(status:success topic:#{@current_topic}),
207
+ by: group.size
208
+ )
209
+ current_index += group.size
210
+ @logger.info("Sent #{group.size} messages to #{@current_topic}")
211
+ end
212
+ rescue Kafka::BufferOverflow, Kafka::MessageSizeTooLarge,
213
+ Kafka::RecordListTooLarge => e
214
+ if batch_size == 1
215
+ shutdown_producer
216
+ raise
217
+ end
218
+
219
+ @logger.error("Got error #{e.class.name} when publishing #{batch.size} in groups of #{batch_size}, retrying...")
220
+ batch_size = if batch_size < 10
221
+ 1
222
+ else
223
+ (batch_size / 10).to_i
224
+ end
225
+ shutdown_producer
226
+ retry
227
+ end
228
+ end
229
+
230
+ # @param batch [Array<Deimos::KafkaMessage>]
231
+ # @return [Array<Deimos::KafkaMessage>]
232
+ def compact_messages(batch)
233
+ return batch if batch.first&.key.blank?
234
+
235
+ topic = batch.first.topic
236
+ return batch if config.compact_topics != :all &&
237
+ !config.compact_topics.include?(topic)
238
+
239
+ batch.reverse.uniq(&:key).reverse!
240
+ end
241
+ end
242
+ end
243
+ end