deimos-kafka 1.0.0.pre.beta15

Sign up to get free protection for your applications and to get access to all the features.
Files changed (100) hide show
  1. checksums.yaml +7 -0
  2. data/.circleci/config.yml +74 -0
  3. data/.gitignore +41 -0
  4. data/.gitmodules +0 -0
  5. data/.rspec +1 -0
  6. data/.rubocop.yml +321 -0
  7. data/.ruby-gemset +1 -0
  8. data/.ruby-version +1 -0
  9. data/CHANGELOG.md +9 -0
  10. data/CODE_OF_CONDUCT.md +77 -0
  11. data/Dockerfile +23 -0
  12. data/Gemfile +6 -0
  13. data/Gemfile.lock +165 -0
  14. data/Guardfile +22 -0
  15. data/LICENSE.md +195 -0
  16. data/README.md +742 -0
  17. data/Rakefile +13 -0
  18. data/bin/deimos +4 -0
  19. data/deimos-kafka.gemspec +42 -0
  20. data/docker-compose.yml +71 -0
  21. data/docs/DATABASE_BACKEND.md +147 -0
  22. data/docs/PULL_REQUEST_TEMPLATE.md +34 -0
  23. data/lib/deimos.rb +134 -0
  24. data/lib/deimos/active_record_consumer.rb +81 -0
  25. data/lib/deimos/active_record_producer.rb +64 -0
  26. data/lib/deimos/avro_data_coder.rb +89 -0
  27. data/lib/deimos/avro_data_decoder.rb +36 -0
  28. data/lib/deimos/avro_data_encoder.rb +51 -0
  29. data/lib/deimos/backends/db.rb +27 -0
  30. data/lib/deimos/backends/kafka.rb +27 -0
  31. data/lib/deimos/backends/kafka_async.rb +27 -0
  32. data/lib/deimos/configuration.rb +88 -0
  33. data/lib/deimos/consumer.rb +164 -0
  34. data/lib/deimos/instrumentation.rb +71 -0
  35. data/lib/deimos/kafka_message.rb +27 -0
  36. data/lib/deimos/kafka_source.rb +126 -0
  37. data/lib/deimos/kafka_topic_info.rb +79 -0
  38. data/lib/deimos/message.rb +74 -0
  39. data/lib/deimos/metrics/datadog.rb +47 -0
  40. data/lib/deimos/metrics/mock.rb +39 -0
  41. data/lib/deimos/metrics/provider.rb +38 -0
  42. data/lib/deimos/monkey_patches/phobos_cli.rb +35 -0
  43. data/lib/deimos/monkey_patches/phobos_producer.rb +51 -0
  44. data/lib/deimos/monkey_patches/ruby_kafka_heartbeat.rb +85 -0
  45. data/lib/deimos/monkey_patches/schema_store.rb +19 -0
  46. data/lib/deimos/producer.rb +218 -0
  47. data/lib/deimos/publish_backend.rb +30 -0
  48. data/lib/deimos/railtie.rb +8 -0
  49. data/lib/deimos/schema_coercer.rb +108 -0
  50. data/lib/deimos/shared_config.rb +59 -0
  51. data/lib/deimos/test_helpers.rb +356 -0
  52. data/lib/deimos/tracing/datadog.rb +35 -0
  53. data/lib/deimos/tracing/mock.rb +40 -0
  54. data/lib/deimos/tracing/provider.rb +31 -0
  55. data/lib/deimos/utils/db_producer.rb +95 -0
  56. data/lib/deimos/utils/executor.rb +117 -0
  57. data/lib/deimos/utils/inline_consumer.rb +144 -0
  58. data/lib/deimos/utils/lag_reporter.rb +182 -0
  59. data/lib/deimos/utils/platform_schema_validation.rb +0 -0
  60. data/lib/deimos/utils/signal_handler.rb +68 -0
  61. data/lib/deimos/version.rb +5 -0
  62. data/lib/generators/deimos/db_backend/templates/migration +24 -0
  63. data/lib/generators/deimos/db_backend/templates/rails3_migration +30 -0
  64. data/lib/generators/deimos/db_backend_generator.rb +48 -0
  65. data/lib/tasks/deimos.rake +17 -0
  66. data/spec/active_record_consumer_spec.rb +81 -0
  67. data/spec/active_record_producer_spec.rb +107 -0
  68. data/spec/avro_data_decoder_spec.rb +18 -0
  69. data/spec/avro_data_encoder_spec.rb +37 -0
  70. data/spec/backends/db_spec.rb +35 -0
  71. data/spec/backends/kafka_async_spec.rb +11 -0
  72. data/spec/backends/kafka_spec.rb +11 -0
  73. data/spec/consumer_spec.rb +169 -0
  74. data/spec/deimos_spec.rb +117 -0
  75. data/spec/kafka_source_spec.rb +168 -0
  76. data/spec/kafka_topic_info_spec.rb +88 -0
  77. data/spec/phobos.bad_db.yml +73 -0
  78. data/spec/phobos.yml +73 -0
  79. data/spec/producer_spec.rb +397 -0
  80. data/spec/publish_backend_spec.rb +10 -0
  81. data/spec/schemas/com/my-namespace/MySchema-key.avsc +13 -0
  82. data/spec/schemas/com/my-namespace/MySchema.avsc +18 -0
  83. data/spec/schemas/com/my-namespace/MySchemaWithBooleans.avsc +18 -0
  84. data/spec/schemas/com/my-namespace/MySchemaWithDateTimes.avsc +33 -0
  85. data/spec/schemas/com/my-namespace/MySchemaWithId.avsc +28 -0
  86. data/spec/schemas/com/my-namespace/MySchemaWithUniqueId.avsc +32 -0
  87. data/spec/schemas/com/my-namespace/Widget.avsc +27 -0
  88. data/spec/schemas/com/my-namespace/WidgetTheSecond.avsc +27 -0
  89. data/spec/spec_helper.rb +207 -0
  90. data/spec/updateable_schema_store_spec.rb +36 -0
  91. data/spec/utils/db_producer_spec.rb +208 -0
  92. data/spec/utils/executor_spec.rb +42 -0
  93. data/spec/utils/lag_reporter_spec.rb +69 -0
  94. data/spec/utils/platform_schema_validation_spec.rb +0 -0
  95. data/spec/utils/signal_handler_spec.rb +16 -0
  96. data/support/deimos-solo.png +0 -0
  97. data/support/deimos-with-name-next.png +0 -0
  98. data/support/deimos-with-name.png +0 -0
  99. data/support/flipp-logo.png +0 -0
  100. metadata +452 -0
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'deimos/tracing/provider'
4
+
5
+ module Deimos
6
+ module Tracing
7
+ # Tracing wrapper class for Datadog.
8
+ class Datadog < Tracing::Provider
9
+ # :nodoc:
10
+ def initialize(config)
11
+ raise 'Tracing config must specify service_name' if config[:service_name].nil?
12
+
13
+ @service = config[:service_name]
14
+ end
15
+
16
+ # :nodoc:
17
+ def start(span_name, options={})
18
+ span = ::Datadog.tracer.trace(span_name)
19
+ span.service = @service
20
+ span.resource = options[:resource]
21
+ span
22
+ end
23
+
24
+ # :nodoc:
25
+ def finish(span)
26
+ span.finish
27
+ end
28
+
29
+ # :nodoc:
30
+ def set_error(span, exception)
31
+ span.set_error(exception)
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'deimos/tracing/provider'
4
+
5
+ module Deimos
6
+ module Tracing
7
+ # Class that mocks out tracing functionality
8
+ class Mock < Tracing::Provider
9
+ # :nodoc:
10
+ def initialize(logger=nil)
11
+ @logger = logger || Logger.new(STDOUT)
12
+ @logger.info('MockTracingProvider initialized')
13
+ end
14
+
15
+ # :nodoc:
16
+ def start(span_name, _options={})
17
+ @logger.info("Mock span '#{span_name}' started")
18
+ {
19
+ name: span_name,
20
+ started_at: Time.zone.now
21
+ }
22
+ end
23
+
24
+ # :nodoc:
25
+ def finish(span)
26
+ name = span[:name]
27
+ start = span[:started_at]
28
+ finish = Time.zone.now
29
+ @logger.info("Mock span '#{name}' finished: #{start} to #{finish}")
30
+ end
31
+
32
+ # :nodoc:
33
+ def set_error(span, exception)
34
+ span[:exception] = exception
35
+ name = span[:name]
36
+ @logger.info("Mock span '#{name}' set an error: #{exception}")
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ # rubocop:disable Lint/UnusedMethodArgument
4
+ module Deimos
5
+ module Tracing
6
+ # Base class for all tracing providers.
7
+ class Provider
8
+ # Returns a span object and starts the trace.
9
+ # @param span_name [String] The name of the span/trace
10
+ # @param options [Hash] Options for the span
11
+ # @return [Object] The span object
12
+ def start(span_name, options={})
13
+ raise NotImplementedError
14
+ end
15
+
16
+ # Finishes the trace on the span object.
17
+ # @param span [Object] The span to finish trace on
18
+ def finish(span)
19
+ raise NotImplementedError
20
+ end
21
+
22
+ # Set an error on the span.
23
+ # @param span [Object] The span to set error on
24
+ # @param exception [Exception] The exception that occurred
25
+ def set_error(span, exception)
26
+ raise NotImplementedError
27
+ end
28
+ end
29
+ end
30
+ end
31
+ # rubocop:enable Lint/UnusedMethodArgument
@@ -0,0 +1,95 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ module Utils
5
+ # Class which continually polls the database and sends Kafka messages.
6
+ class DbProducer
7
+ include Phobos::Producer
8
+ attr_accessor :id, :current_topic
9
+
10
+ BATCH_SIZE = 1000
11
+
12
+ # @param logger [Logger]
13
+ def initialize(logger=Logger.new(STDOUT))
14
+ @id = SecureRandom.uuid
15
+ @logger = logger
16
+ @logger.push_tags("DbProducer #{@id}") if @logger.respond_to?(:push_tags)
17
+ end
18
+
19
+ # Start the poll.
20
+ def start
21
+ @logger.info('Starting...')
22
+ @signal_to_stop = false
23
+ loop do
24
+ if @signal_to_stop
25
+ @logger.info('Shutting down')
26
+ break
27
+ end
28
+ process_next_messages
29
+ end
30
+ end
31
+
32
+ # Stop the poll.
33
+ def stop
34
+ @logger.info('Received signal to stop')
35
+ @signal_to_stop = true
36
+ end
37
+
38
+ # Complete one loop of processing all messages in the DB.
39
+ def process_next_messages
40
+ topics = retrieve_topics
41
+ @logger.info("Found topics: #{topics}")
42
+ topics.each(&method(:process_topic))
43
+ sleep(0.5)
44
+ end
45
+
46
+ # @return [Array<String>]
47
+ def retrieve_topics
48
+ KafkaMessage.select('distinct topic').map(&:topic).uniq
49
+ end
50
+
51
+ # @param topic [String]
52
+ # @return [String] the topic that was locked, or nil if none were.
53
+ def process_topic(topic)
54
+ # If the topic is already locked, another producer is currently
55
+ # working on it. Move on to the next one.
56
+ unless KafkaTopicInfo.lock(topic, @id)
57
+ @logger.debug("Could not lock topic #{topic} - continuing")
58
+ return
59
+ end
60
+ @current_topic = topic
61
+ messages = retrieve_messages
62
+
63
+ while messages.any?
64
+ produce_messages(messages.map(&:phobos_message))
65
+ messages.first.class.where(id: messages.map(&:id)).delete_all
66
+ break if messages.size < BATCH_SIZE
67
+
68
+ KafkaTopicInfo.heartbeat(@current_topic, @id) # keep alive
69
+ messages = retrieve_messages
70
+ end
71
+ KafkaTopicInfo.clear_lock(@current_topic, @id)
72
+ rescue StandardError => e
73
+ @logger.error("Error processing messages for topic #{@current_topic}: #{e.class.name}: #{e.message} #{e.backtrace.join("\n")}")
74
+ KafkaTopicInfo.register_error(@current_topic, @id)
75
+ end
76
+
77
+ # @return [Array<KafkaMessage>]
78
+ def retrieve_messages
79
+ KafkaMessage.where(topic: @current_topic).order(:id).limit(BATCH_SIZE)
80
+ end
81
+
82
+ # @param batch [Array<Hash>]
83
+ def produce_messages(batch)
84
+ @logger.debug("Publishing #{batch.size} messages to #{@current_topic}")
85
+ producer.publish_list(batch)
86
+ Deimos.metrics&.increment(
87
+ 'publish',
88
+ tags: %W(status:success topic:#{@current_topic}),
89
+ by: batch.size
90
+ )
91
+ @logger.info("Sent #{batch.size} messages to #{@current_topic}")
92
+ end
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,117 @@
1
+ # frozen_string_literal: true
2
+
3
+ # rubocop:disable Lint/RescueException
4
+ module Deimos
5
+ module Utils
6
+ # Mostly copied from Phobos::Executor. We should DRY this up by putting in a
7
+ # PR to make it more generic. Might even make sense to move to a separate
8
+ # gem.
9
+ class Executor
10
+ # @return [Array<#start, #stop, #id>]
11
+ attr_accessor :runners
12
+
13
+ # @param runners [Array<#start, #stop, #id>] A list of objects that can be
14
+ # started or stopped.
15
+ # @param logger [Logger]
16
+ def initialize(runners, logger=Logger.new(STDOUT))
17
+ @threads = Concurrent::Array.new
18
+ @runners = runners
19
+ @logger = logger
20
+ end
21
+
22
+ # Start the executor.
23
+ def start
24
+ @logger.info('Starting executor')
25
+ @signal_to_stop = false
26
+ @threads.clear
27
+ @thread_pool = Concurrent::FixedThreadPool.new(@runners.size)
28
+
29
+ @runners.each do |runner|
30
+ @thread_pool.post do
31
+ thread = Thread.current
32
+ thread.abort_on_exception = true
33
+ @threads << thread
34
+ run_object(runner)
35
+ end
36
+ end
37
+
38
+ true
39
+ end
40
+
41
+ # Stop the executor.
42
+ def stop
43
+ return if @signal_to_stop
44
+
45
+ @logger.info('Stopping executor')
46
+ @signal_to_stop = true
47
+ @runners.each(&:stop)
48
+ @threads.select(&:alive?).each do |thread|
49
+ begin
50
+ thread.wakeup
51
+ rescue StandardError
52
+ nil
53
+ end
54
+ end
55
+ @thread_pool&.shutdown
56
+ @thread_pool&.wait_for_termination
57
+ @logger.info('Executor stopped')
58
+ end
59
+
60
+ private
61
+
62
+ # @param exception [Throwable]
63
+ # @return [Hash]
64
+ def error_metadata(exception)
65
+ {
66
+ exception_class: exception.class.name,
67
+ exception_message: exception.message,
68
+ backtrace: exception.backtrace
69
+ }
70
+ end
71
+
72
+ def run_object(runner)
73
+ retry_count = 0
74
+
75
+ begin
76
+ @logger.info("Running #{runner.id}")
77
+ runner.start
78
+ retry_count = 0 # success - reset retry count
79
+ rescue Exception => e
80
+ handle_crashed_runner(runner, e, retry_count)
81
+ retry_count += 1
82
+ retry unless @signal_to_stop
83
+ end
84
+ rescue Exception => e
85
+ @logger.error("Failed to run listener (#{e.message}) #{error_metadata(e)}")
86
+ raise e
87
+ end
88
+
89
+ # @return [ExponentialBackoff]
90
+ def create_exponential_backoff
91
+ min = 1
92
+ max = 60
93
+ ExponentialBackoff.new(min, max).tap do |backoff|
94
+ backoff.randomize_factor = rand
95
+ end
96
+ end
97
+
98
+ # When "runner#start" is interrupted / crashes we assume it's
99
+ # safe to be called again
100
+ def handle_crashed_runner(runner, error, retry_count)
101
+ backoff = create_exponential_backoff
102
+ interval = backoff.interval_at(retry_count).round(2)
103
+
104
+ metadata = {
105
+ listener_id: runner.id,
106
+ retry_count: retry_count,
107
+ waiting_time: interval
108
+ }.merge(error_metadata(error))
109
+
110
+ @logger.error("Runner crashed, waiting #{interval}s (#{error.message}) #{metadata}")
111
+ sleep(interval)
112
+ end
113
+ end
114
+ end
115
+ end
116
+
117
+ # rubocop:enable Lint/RescueException
@@ -0,0 +1,144 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Class to consume messages. Can be used with integration testing frameworks.
4
+ # Assumes that you have a topic with only one partition.
5
+ module Deimos
6
+ module Utils
7
+ # Listener that can seek to get the last X messages in a topic.
8
+ class SeekListener < Phobos::Listener
9
+ attr_accessor :num_messages
10
+
11
+ # :nodoc:
12
+ def start_listener
13
+ @num_messages ||= 10
14
+ @consumer = create_kafka_consumer
15
+ @consumer.subscribe(topic, @subscribe_opts)
16
+
17
+ begin
18
+ last_offset = @kafka_client.last_offset_for(topic, 0)
19
+ offset = last_offset - num_messages
20
+ if offset.positive?
21
+ Deimos.config.logger.info("Seeking to #{offset}")
22
+ @consumer.seek(topic, 0, offset)
23
+ end
24
+ rescue StandardError => e
25
+ "Could not seek to offset: #{e.message}"
26
+ end
27
+
28
+ instrument('listener.start_handler', listener_metadata) do
29
+ @handler_class.start(@kafka_client)
30
+ end
31
+ log_info('Listener started', listener_metadata)
32
+ end
33
+ end
34
+
35
+ # Class to return the messages consumed.
36
+ class MessageBankHandler < Deimos::Consumer
37
+ include Phobos::Handler
38
+
39
+ cattr_accessor :total_messages
40
+
41
+ # @param klass [Class < Deimos::Consumer]
42
+ def self.config_class=(klass)
43
+ self.config.merge!(klass.config)
44
+ end
45
+
46
+ # :nodoc:
47
+ def self.start(_kafka_client)
48
+ self.total_messages = []
49
+ end
50
+
51
+ # :nodoc:
52
+ def consume(payload, metadata)
53
+ puts "Got #{payload}"
54
+ self.class.total_messages << {
55
+ key: metadata[:key],
56
+ payload: payload
57
+ }
58
+ end
59
+ end
60
+
61
+ # Class which can process/consume messages inline.
62
+ class InlineConsumer
63
+ MAX_MESSAGE_WAIT_TIME = 1.second
64
+ MAX_TOPIC_WAIT_TIME = 10.seconds
65
+
66
+ # Get the last X messages from a topic. You can specify a subclass of
67
+ # Deimos::Consumer or Deimos::Producer, or provide the
68
+ # schema, namespace and key_config directly.
69
+ # @param topic [String]
70
+ # @param config_class [Class < Deimos::Consumer|Deimos::Producer>]
71
+ # @param schema [String]
72
+ # @param namespace [String]
73
+ # @param key_config [Hash]
74
+ # @param num_messages [Number]
75
+ # @return [Array<Hash>]
76
+ def self.get_messages_for(topic:, schema: nil, namespace: nil, key_config: nil,
77
+ config_class: nil, num_messages: 10)
78
+ if config_class
79
+ MessageBankHandler.config_class = config_class
80
+ elsif schema.nil? || key_config.nil?
81
+ raise 'You must specify either a config_class or a schema, namespace and key_config!'
82
+ else
83
+ MessageBankHandler.class_eval do
84
+ schema schema
85
+ namespace namespace
86
+ key_config key_config
87
+ @decoder = nil
88
+ @key_decoder = nil
89
+ end
90
+ end
91
+ self.consume(topic: topic,
92
+ frk_consumer: MessageBankHandler,
93
+ num_messages: num_messages)
94
+ messages = MessageBankHandler.total_messages
95
+ messages.size <= num_messages ? messages : messages[-num_messages..-1]
96
+ end
97
+
98
+ # Consume the last X messages from a topic.
99
+ # @param topic [String]
100
+ # @param frk_consumer [Class]
101
+ # @param num_messages [Integer] If this number is >= the number
102
+ # of messages in the topic, all messages will be consumed.
103
+ def self.consume(topic:, frk_consumer:, num_messages: 10)
104
+ listener = SeekListener.new(
105
+ handler: frk_consumer,
106
+ group_id: SecureRandom.hex,
107
+ topic: topic,
108
+ heartbeat_interval: 1
109
+ )
110
+ listener.num_messages = num_messages
111
+
112
+ # Add the start_time and last_message_time attributes to the
113
+ # consumer class so we can kill it if it's gone on too long
114
+ class << frk_consumer
115
+ attr_accessor :start_time, :last_message_time
116
+ end
117
+
118
+ subscribers = []
119
+ subscribers << ActiveSupport::Notifications.
120
+ subscribe('phobos.listener.process_message') do
121
+ frk_consumer.last_message_time = Time.zone.now
122
+ end
123
+ subscribers << ActiveSupport::Notifications.
124
+ subscribe('phobos.listener.start_handler') do
125
+ frk_consumer.start_time = Time.zone.now
126
+ frk_consumer.last_message_time = nil
127
+ end
128
+ subscribers << ActiveSupport::Notifications.
129
+ subscribe('heartbeat.consumer.kafka') do
130
+ if frk_consumer.last_message_time
131
+ if Time.zone.now - frk_consumer.last_message_time > MAX_MESSAGE_WAIT_TIME
132
+ raise Phobos::AbortError
133
+ end
134
+ elsif Time.zone.now - frk_consumer.start_time > MAX_TOPIC_WAIT_TIME
135
+ Deimos.config.logger.error('Aborting - initial wait too long')
136
+ raise Phobos::AbortError
137
+ end
138
+ end
139
+ listener.start
140
+ subscribers.each { |s| ActiveSupport::Notifications.unsubscribe(s) }
141
+ end
142
+ end
143
+ end
144
+ end