deimos-kafka 1.0.0.pre.beta15

Sign up to get free protection for your applications and to get access to all the features.
Files changed (100) hide show
  1. checksums.yaml +7 -0
  2. data/.circleci/config.yml +74 -0
  3. data/.gitignore +41 -0
  4. data/.gitmodules +0 -0
  5. data/.rspec +1 -0
  6. data/.rubocop.yml +321 -0
  7. data/.ruby-gemset +1 -0
  8. data/.ruby-version +1 -0
  9. data/CHANGELOG.md +9 -0
  10. data/CODE_OF_CONDUCT.md +77 -0
  11. data/Dockerfile +23 -0
  12. data/Gemfile +6 -0
  13. data/Gemfile.lock +165 -0
  14. data/Guardfile +22 -0
  15. data/LICENSE.md +195 -0
  16. data/README.md +742 -0
  17. data/Rakefile +13 -0
  18. data/bin/deimos +4 -0
  19. data/deimos-kafka.gemspec +42 -0
  20. data/docker-compose.yml +71 -0
  21. data/docs/DATABASE_BACKEND.md +147 -0
  22. data/docs/PULL_REQUEST_TEMPLATE.md +34 -0
  23. data/lib/deimos.rb +134 -0
  24. data/lib/deimos/active_record_consumer.rb +81 -0
  25. data/lib/deimos/active_record_producer.rb +64 -0
  26. data/lib/deimos/avro_data_coder.rb +89 -0
  27. data/lib/deimos/avro_data_decoder.rb +36 -0
  28. data/lib/deimos/avro_data_encoder.rb +51 -0
  29. data/lib/deimos/backends/db.rb +27 -0
  30. data/lib/deimos/backends/kafka.rb +27 -0
  31. data/lib/deimos/backends/kafka_async.rb +27 -0
  32. data/lib/deimos/configuration.rb +88 -0
  33. data/lib/deimos/consumer.rb +164 -0
  34. data/lib/deimos/instrumentation.rb +71 -0
  35. data/lib/deimos/kafka_message.rb +27 -0
  36. data/lib/deimos/kafka_source.rb +126 -0
  37. data/lib/deimos/kafka_topic_info.rb +79 -0
  38. data/lib/deimos/message.rb +74 -0
  39. data/lib/deimos/metrics/datadog.rb +47 -0
  40. data/lib/deimos/metrics/mock.rb +39 -0
  41. data/lib/deimos/metrics/provider.rb +38 -0
  42. data/lib/deimos/monkey_patches/phobos_cli.rb +35 -0
  43. data/lib/deimos/monkey_patches/phobos_producer.rb +51 -0
  44. data/lib/deimos/monkey_patches/ruby_kafka_heartbeat.rb +85 -0
  45. data/lib/deimos/monkey_patches/schema_store.rb +19 -0
  46. data/lib/deimos/producer.rb +218 -0
  47. data/lib/deimos/publish_backend.rb +30 -0
  48. data/lib/deimos/railtie.rb +8 -0
  49. data/lib/deimos/schema_coercer.rb +108 -0
  50. data/lib/deimos/shared_config.rb +59 -0
  51. data/lib/deimos/test_helpers.rb +356 -0
  52. data/lib/deimos/tracing/datadog.rb +35 -0
  53. data/lib/deimos/tracing/mock.rb +40 -0
  54. data/lib/deimos/tracing/provider.rb +31 -0
  55. data/lib/deimos/utils/db_producer.rb +95 -0
  56. data/lib/deimos/utils/executor.rb +117 -0
  57. data/lib/deimos/utils/inline_consumer.rb +144 -0
  58. data/lib/deimos/utils/lag_reporter.rb +182 -0
  59. data/lib/deimos/utils/platform_schema_validation.rb +0 -0
  60. data/lib/deimos/utils/signal_handler.rb +68 -0
  61. data/lib/deimos/version.rb +5 -0
  62. data/lib/generators/deimos/db_backend/templates/migration +24 -0
  63. data/lib/generators/deimos/db_backend/templates/rails3_migration +30 -0
  64. data/lib/generators/deimos/db_backend_generator.rb +48 -0
  65. data/lib/tasks/deimos.rake +17 -0
  66. data/spec/active_record_consumer_spec.rb +81 -0
  67. data/spec/active_record_producer_spec.rb +107 -0
  68. data/spec/avro_data_decoder_spec.rb +18 -0
  69. data/spec/avro_data_encoder_spec.rb +37 -0
  70. data/spec/backends/db_spec.rb +35 -0
  71. data/spec/backends/kafka_async_spec.rb +11 -0
  72. data/spec/backends/kafka_spec.rb +11 -0
  73. data/spec/consumer_spec.rb +169 -0
  74. data/spec/deimos_spec.rb +117 -0
  75. data/spec/kafka_source_spec.rb +168 -0
  76. data/spec/kafka_topic_info_spec.rb +88 -0
  77. data/spec/phobos.bad_db.yml +73 -0
  78. data/spec/phobos.yml +73 -0
  79. data/spec/producer_spec.rb +397 -0
  80. data/spec/publish_backend_spec.rb +10 -0
  81. data/spec/schemas/com/my-namespace/MySchema-key.avsc +13 -0
  82. data/spec/schemas/com/my-namespace/MySchema.avsc +18 -0
  83. data/spec/schemas/com/my-namespace/MySchemaWithBooleans.avsc +18 -0
  84. data/spec/schemas/com/my-namespace/MySchemaWithDateTimes.avsc +33 -0
  85. data/spec/schemas/com/my-namespace/MySchemaWithId.avsc +28 -0
  86. data/spec/schemas/com/my-namespace/MySchemaWithUniqueId.avsc +32 -0
  87. data/spec/schemas/com/my-namespace/Widget.avsc +27 -0
  88. data/spec/schemas/com/my-namespace/WidgetTheSecond.avsc +27 -0
  89. data/spec/spec_helper.rb +207 -0
  90. data/spec/updateable_schema_store_spec.rb +36 -0
  91. data/spec/utils/db_producer_spec.rb +208 -0
  92. data/spec/utils/executor_spec.rb +42 -0
  93. data/spec/utils/lag_reporter_spec.rb +69 -0
  94. data/spec/utils/platform_schema_validation_spec.rb +0 -0
  95. data/spec/utils/signal_handler_spec.rb +16 -0
  96. data/support/deimos-solo.png +0 -0
  97. data/support/deimos-with-name-next.png +0 -0
  98. data/support/deimos-with-name.png +0 -0
  99. data/support/flipp-logo.png +0 -0
  100. metadata +452 -0
@@ -0,0 +1,164 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'deimos/avro_data_decoder'
4
+ require 'deimos/shared_config'
5
+ require 'phobos/handler'
6
+ require 'active_support/all'
7
+ require 'ddtrace'
8
+
9
+ # Class to consume messages coming from the pipeline topic
10
+ # Note: According to the docs, instances of your handler will be created
11
+ # for every incoming message. This class should be lightweight.
12
+ module Deimos
13
+ # Parent consumer class.
14
+ class Consumer
15
+ include Phobos::Handler
16
+ include SharedConfig
17
+
18
+ class << self
19
+ # @return [AvroDataEncoder]
20
+ def decoder
21
+ @decoder ||= AvroDataDecoder.new(schema: config[:schema],
22
+ namespace: config[:namespace])
23
+ end
24
+
25
+ # @return [AvroDataEncoder]
26
+ def key_decoder
27
+ @key_decoder ||= AvroDataDecoder.new(schema: config[:key_schema],
28
+ namespace: config[:namespace])
29
+ end
30
+ end
31
+
32
+ # :nodoc:
33
+ def around_consume(payload, metadata)
34
+ _received_message(payload, metadata)
35
+ benchmark = Benchmark.measure do
36
+ _with_error_span(payload, metadata) { yield }
37
+ end
38
+ _handle_success(benchmark.real, payload, metadata)
39
+ end
40
+
41
+ # :nodoc:
42
+ def before_consume(payload, metadata)
43
+ _with_error_span(payload, metadata) do
44
+ if self.class.config[:key_schema] || self.class.config[:key_field]
45
+ metadata[:key] = decode_key(metadata[:key])
46
+ end
47
+ self.class.decoder.decode(payload) if payload.present?
48
+ end
49
+ end
50
+
51
+ # Helper method to decode an Avro-encoded key.
52
+ # @param key [String]
53
+ # @return [Object] the decoded key.
54
+ def decode_key(key)
55
+ return nil if key.nil?
56
+
57
+ config = self.class.config
58
+ if config[:encode_key] && config[:key_field].nil? &&
59
+ config[:key_schema].nil?
60
+ raise 'No key config given - if you are not decoding keys, please use `key_config plain: true`'
61
+ end
62
+
63
+ if config[:key_field]
64
+ self.class.decoder.decode_key(key, config[:key_field])
65
+ elsif config[:key_schema]
66
+ self.class.key_decoder.decode(key, schema: config[:key_schema])
67
+ else # no encoding
68
+ key
69
+ end
70
+ end
71
+
72
+ # Consume incoming messages.
73
+ # @param _payload [String]
74
+ # @param _metadata [Hash]
75
+ def consume(_payload, _metadata)
76
+ raise NotImplementedError
77
+ end
78
+
79
+ private
80
+
81
+ # @param payload [Hash|String]
82
+ # @param metadata [Hash]
83
+ def _with_error_span(payload, metadata)
84
+ @span = Deimos.config.tracer&.start(
85
+ 'deimos-consumer',
86
+ resource: self.class.name.gsub('::', '-')
87
+ )
88
+ yield
89
+ rescue StandardError => e
90
+ _handle_error(e, payload, metadata)
91
+ ensure
92
+ Deimos.config.tracer&.finish(@span)
93
+ end
94
+
95
+ def _received_message(payload, metadata)
96
+ Deimos.config.logger.info(
97
+ message: 'Got Kafka event',
98
+ payload: payload,
99
+ metadata: metadata
100
+ )
101
+ Deimos.config.metrics&.increment('handler', tags: %W(
102
+ status:received
103
+ topic:#{metadata[:topic]}
104
+ ))
105
+ end
106
+
107
+ # @param exception [Throwable]
108
+ # @param payload [Hash]
109
+ # @param metadata [Hash]
110
+ def _handle_error(exception, payload, metadata)
111
+ Deimos.config.tracer&.set_error(@span, exception)
112
+ Deimos.config.metrics&.increment(
113
+ 'handler',
114
+ tags: %W(
115
+ status:error
116
+ topic:#{metadata[:topic]}
117
+ )
118
+ )
119
+ Deimos.config.logger.warn(
120
+ message: 'Error consuming message',
121
+ handler: self.class.name,
122
+ metadata: metadata,
123
+ data: payload,
124
+ error_message: exception.message,
125
+ error: exception.backtrace
126
+ )
127
+ raise if Deimos.config.reraise_consumer_errors
128
+ end
129
+
130
+ # @param time_taken [Float]
131
+ # @param payload [Hash]
132
+ # @param metadata [Hash]
133
+ def _handle_success(time_taken, payload, metadata)
134
+ Deimos.config.metrics&.histogram('handler', time_taken, tags: %W(
135
+ time:consume
136
+ topic:#{metadata[:topic]}
137
+ ))
138
+ Deimos.config.metrics&.increment('handler', tags: %W(
139
+ status:success
140
+ topic:#{metadata[:topic]}
141
+ ))
142
+ Deimos.config.logger.info(
143
+ message: 'Finished processing Kafka event',
144
+ payload: payload,
145
+ time_elapsed: time_taken,
146
+ metadata: metadata
147
+ )
148
+ return if payload.nil? || payload['timestamp'].blank?
149
+
150
+ begin
151
+ time_delayed = Time.now.in_time_zone - payload['timestamp'].to_datetime
152
+ rescue ArgumentError
153
+ Deimos.config.logger.info(
154
+ message: "Error parsing timestamp! #{payload['timestamp']}"
155
+ )
156
+ return
157
+ end
158
+ Deimos.config.metrics&.histogram('handler', time_delayed, tags: %W(
159
+ time:time_delayed
160
+ topic:#{metadata[:topic]}
161
+ ))
162
+ end
163
+ end
164
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'active_support/notifications'
4
+ require 'active_support/concern'
5
+
6
+ # :nodoc:
7
+ module Deimos
8
+ # Copied from Phobos instrumentation.
9
+ module Instrumentation
10
+ extend ActiveSupport::Concern
11
+ NAMESPACE = 'Deimos'
12
+
13
+ # :nodoc:
14
+ module ClassMethods
15
+ # :nodoc:
16
+ def subscribe(event)
17
+ ActiveSupport::Notifications.subscribe("#{NAMESPACE}.#{event}") do |*args|
18
+ yield(ActiveSupport::Notifications::Event.new(*args)) if block_given?
19
+ end
20
+ end
21
+
22
+ # :nodoc:
23
+ def unsubscribe(subscriber)
24
+ ActiveSupport::Notifications.unsubscribe(subscriber)
25
+ end
26
+
27
+ # :nodoc:
28
+ def instrument(event, extra={})
29
+ ActiveSupport::Notifications.instrument("#{NAMESPACE}.#{event}", extra) do |extra2|
30
+ yield(extra2) if block_given?
31
+ end
32
+ end
33
+ end
34
+ end
35
+
36
+ include Instrumentation
37
+
38
+ # This module listens to events published by RubyKafka.
39
+ module KafkaListener
40
+ # Listens for any exceptions that happen during publishing and re-publishes
41
+ # as a Deimos event.
42
+ # @param event [ActiveSupport::Notification]
43
+ def self.send_produce_error(event)
44
+ exception = event.payload[:exception_object]
45
+ return if !exception || !exception.respond_to?(:failed_messages)
46
+
47
+ messages = exception.failed_messages
48
+ messages.group_by(&:topic).each do |topic, batch|
49
+ next if batch.empty?
50
+
51
+ producer = batch.first.metadata[:producer_name]
52
+ payloads = batch.map { |m| m.metadata[:decoded_payload] }
53
+
54
+ Deimos.metrics&.count('publish_error', payloads.size,
55
+ tags: %W(topic:#{topic}))
56
+ Deimos.instrument(
57
+ 'produce_error',
58
+ producer: producer,
59
+ topic: topic,
60
+ exception_object: exception,
61
+ payloads: payloads
62
+ )
63
+ end
64
+ end
65
+ end
66
+
67
+ ActiveSupport::Notifications.subscribe('deliver_messages.producer.kafka') do |*args|
68
+ event = ActiveSupport::Notifications::Event.new(*args)
69
+ KafkaListener.send_produce_error(event)
70
+ end
71
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ # Store Kafka messages into the database.
5
+ class KafkaMessage < ActiveRecord::Base
6
+ self.table_name = 'kafka_messages'
7
+
8
+ validates_presence_of :message, :topic
9
+
10
+ # Ensure it gets turned into a string, e.g. for testing purposes. It
11
+ # should already be a string.
12
+ # @param mess [Object]
13
+ def message=(mess)
14
+ write_attribute(:message, mess.to_s)
15
+ end
16
+
17
+ # @return [Hash]
18
+ def phobos_message
19
+ {
20
+ payload: self.message,
21
+ partition_key: self.partition_key,
22
+ key: self.key,
23
+ topic: self.topic
24
+ }
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,126 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ # Represents an object which needs to inform Kafka when it is saved or
5
+ # bulk imported.
6
+ module KafkaSource
7
+ extend ActiveSupport::Concern
8
+
9
+ included do
10
+ after_create(:send_kafka_event_on_create)
11
+ after_update(:send_kafka_event_on_update)
12
+ after_destroy(:send_kafka_event_on_destroy)
13
+ end
14
+
15
+ # Send the newly created model to Kafka.
16
+ def send_kafka_event_on_create
17
+ return unless self.persisted?
18
+ return unless self.class.kafka_config[:create]
19
+
20
+ self.class.kafka_producers.each { |p| p.send_event(self) }
21
+ end
22
+
23
+ # Send the newly updated model to Kafka.
24
+ def send_kafka_event_on_update
25
+ return unless self.class.kafka_config[:update]
26
+
27
+ producers = self.class.kafka_producers
28
+ fields = producers.flat_map(&:watched_attributes).uniq
29
+ fields -= ['updated_at']
30
+ # Only send an event if a field we care about was changed.
31
+ any_changes = fields.any? do |field|
32
+ field_change = self.previous_changes[field]
33
+ field_change.present? && field_change[0] != field_change[1]
34
+ end
35
+ return unless any_changes
36
+
37
+ producers.each { |p| p.send_event(self) }
38
+ end
39
+
40
+ # Send a deletion (null payload) event to Kafka.
41
+ def send_kafka_event_on_destroy
42
+ return unless self.class.kafka_config[:delete]
43
+
44
+ self.class.kafka_producers.each { |p| p.send_event(self.deletion_payload) }
45
+ end
46
+
47
+ # Payload to send after we are destroyed.
48
+ # @return [Hash]
49
+ def deletion_payload
50
+ { payload_key: self[self.class.primary_key] }
51
+ end
52
+
53
+ # :nodoc:
54
+ module ClassMethods
55
+ # @return [Hash]
56
+ def kafka_config
57
+ {
58
+ update: true,
59
+ delete: true,
60
+ import: true,
61
+ create: true
62
+ }
63
+ end
64
+
65
+ # @return [Array<Deimos::ActiveRecordProducer>] the producers to run.
66
+ def kafka_producers
67
+ raise NotImplementedError if self.method(:kafka_producer).
68
+ owner == Deimos::KafkaSource
69
+
70
+ [self.kafka_producer]
71
+ end
72
+
73
+ # Deprecated - use #kafka_producers instead.
74
+ # @return [Deimos::ActiveRecordProducer] the producer to use.
75
+ def kafka_producer
76
+ raise NotImplementedError if self.method(:kafka_producers).
77
+ owner == Deimos::KafkaSource
78
+
79
+ self.kafka_producers.first
80
+ end
81
+
82
+ # This is an internal method, part of the activerecord_import gem. It's
83
+ # the one that actually does the importing, having already normalized
84
+ # the inputs (arrays, hashes, records etc.)
85
+ # Basically we want to first do the import, then reload the records
86
+ # and send them to Kafka.
87
+ def import_without_validations_or_callbacks(column_names,
88
+ array_of_attributes,
89
+ options={})
90
+ results = super
91
+ return unless self.kafka_config[:import]
92
+ return if array_of_attributes.empty?
93
+
94
+ # This will contain an array of hashes, where each hash is the actual
95
+ # attribute hash that created the object.
96
+ ids =
97
+ ids = if results.is_a?(Array)
98
+ results[1]
99
+ elsif results.respond_to?(:ids)
100
+ results.ids
101
+ else
102
+ []
103
+ end
104
+ if ids.blank?
105
+ # re-fill IDs based on what was just entered into the DB.
106
+ if self.connection.adapter_name.downcase =~ /sqlite/
107
+ last_id = self.connection.select_value('select last_insert_rowid()')
108
+ ids = ((last_id - array_of_attributes.size + 1)..last_id).to_a
109
+ else # mysql
110
+ last_id = self.connection.select_value('select LAST_INSERT_ID()')
111
+ ids = (last_id..(last_id + array_of_attributes.size)).to_a
112
+ end
113
+ end
114
+ array_of_hashes = []
115
+ array_of_attributes.each_with_index do |array, i|
116
+ hash = column_names.zip(array).to_h.with_indifferent_access
117
+ hash[self.primary_key] = ids[i] if hash[self.primary_key].blank?
118
+ array_of_hashes << hash
119
+ end
120
+
121
+ self.kafka_producers.each { |p| p.send_events(array_of_hashes) }
122
+ results
123
+ end
124
+ end
125
+ end
126
+ end
@@ -0,0 +1,79 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ # Record that keeps track of which topics are being worked on by DbProducers.
5
+ class KafkaTopicInfo < ActiveRecord::Base
6
+ self.table_name = 'kafka_topic_info'
7
+
8
+ class << self
9
+ # Lock a topic for the given ID. Returns whether the lock was successful.
10
+ # @param topic [String]
11
+ # @param lock_id [String]
12
+ # @return [Boolean]
13
+ def lock(topic, lock_id)
14
+ # Try to create it - it's fine if it already exists
15
+ begin
16
+ self.create(topic: topic)
17
+ rescue ActiveRecord::RecordNotUnique # rubocop:disable Lint/HandleExceptions
18
+ # continue on
19
+ end
20
+
21
+ # Lock the record
22
+ qtopic = self.connection.quote(topic)
23
+ qlock_id = self.connection.quote(lock_id)
24
+ qtable = self.connection.quote_table_name('kafka_topic_info')
25
+ qnow = self.connection.quote(Time.zone.now.to_s(:db))
26
+ qfalse = self.connection.quoted_false
27
+ qtime = self.connection.quote(1.minute.ago.to_s(:db))
28
+
29
+ # If a record is marked as error and less than 1 minute old,
30
+ # we don't want to pick it up even if not currently locked because
31
+ # we worry we'll run into the same problem again.
32
+ # Once it's more than 1 minute old, we figure it's OK to try again
33
+ # so we can pick up any topic that's that old, even if it was
34
+ # locked by someone, because it's the job of the producer to keep
35
+ # updating the locked_at timestamp as they work on messages in that
36
+ # topic. If the locked_at timestamp is that old, chances are that
37
+ # the producer crashed.
38
+ sql = <<~SQL
39
+ UPDATE #{qtable}
40
+ SET locked_by=#{qlock_id}, locked_at=#{qnow}, error=#{qfalse}
41
+ WHERE topic=#{qtopic} AND
42
+ ((locked_by IS NULL AND error=#{qfalse}) OR locked_at < #{qtime})
43
+ SQL
44
+ self.connection.update(sql)
45
+ self.where(locked_by: lock_id, topic: topic).any?
46
+ end
47
+
48
+ # This is called once a producer is finished working on a topic, i.e.
49
+ # there are no more messages to fetch. It unlocks the topic and
50
+ # moves on to the next one.
51
+ # @param topic [String]
52
+ # @param lock_id [String]
53
+ def clear_lock(topic, lock_id)
54
+ self.where(topic: topic, locked_by: lock_id).
55
+ update_all(locked_by: nil, locked_at: nil, error: false, retries: 0)
56
+ end
57
+
58
+ # The producer calls this if it gets an error sending messages. This
59
+ # essentially locks down this topic for 1 minute (for all producers)
60
+ # and allows the caller to continue to the next topic.
61
+ # @param topic [String]
62
+ # @param lock_id [String]
63
+ def register_error(topic, lock_id)
64
+ record = self.where(topic: topic, locked_by: lock_id).last
65
+ record.update(locked_by: nil, locked_at: Time.zone.now, error: true,
66
+ retries: record.retries + 1)
67
+ end
68
+
69
+ # Update the locked_at timestamp to indicate that the producer is still
70
+ # working on those messages and to continue.
71
+ # @param topic [String]
72
+ # @param lock_id [String]
73
+ def heartbeat(topic, lock_id)
74
+ self.where(topic: topic, locked_by: lock_id).
75
+ update_all(locked_at: Time.zone.now)
76
+ end
77
+ end
78
+ end
79
+ end