deimos-ruby 1.0.0.pre.beta22

Sign up to get free protection for your applications and to get access to all the features.
Files changed (100) hide show
  1. checksums.yaml +7 -0
  2. data/.circleci/config.yml +74 -0
  3. data/.gitignore +41 -0
  4. data/.gitmodules +0 -0
  5. data/.rspec +1 -0
  6. data/.rubocop.yml +321 -0
  7. data/.ruby-gemset +1 -0
  8. data/.ruby-version +1 -0
  9. data/CHANGELOG.md +32 -0
  10. data/CODE_OF_CONDUCT.md +77 -0
  11. data/Dockerfile +23 -0
  12. data/Gemfile +6 -0
  13. data/Gemfile.lock +165 -0
  14. data/Guardfile +22 -0
  15. data/LICENSE.md +195 -0
  16. data/README.md +752 -0
  17. data/Rakefile +13 -0
  18. data/bin/deimos +4 -0
  19. data/deimos-kafka.gemspec +42 -0
  20. data/docker-compose.yml +71 -0
  21. data/docs/DATABASE_BACKEND.md +147 -0
  22. data/docs/PULL_REQUEST_TEMPLATE.md +34 -0
  23. data/lib/deimos/active_record_consumer.rb +81 -0
  24. data/lib/deimos/active_record_producer.rb +64 -0
  25. data/lib/deimos/avro_data_coder.rb +89 -0
  26. data/lib/deimos/avro_data_decoder.rb +36 -0
  27. data/lib/deimos/avro_data_encoder.rb +51 -0
  28. data/lib/deimos/backends/db.rb +27 -0
  29. data/lib/deimos/backends/kafka.rb +27 -0
  30. data/lib/deimos/backends/kafka_async.rb +27 -0
  31. data/lib/deimos/configuration.rb +90 -0
  32. data/lib/deimos/consumer.rb +164 -0
  33. data/lib/deimos/instrumentation.rb +71 -0
  34. data/lib/deimos/kafka_message.rb +27 -0
  35. data/lib/deimos/kafka_source.rb +126 -0
  36. data/lib/deimos/kafka_topic_info.rb +86 -0
  37. data/lib/deimos/message.rb +74 -0
  38. data/lib/deimos/metrics/datadog.rb +47 -0
  39. data/lib/deimos/metrics/mock.rb +39 -0
  40. data/lib/deimos/metrics/provider.rb +38 -0
  41. data/lib/deimos/monkey_patches/phobos_cli.rb +35 -0
  42. data/lib/deimos/monkey_patches/phobos_producer.rb +51 -0
  43. data/lib/deimos/monkey_patches/ruby_kafka_heartbeat.rb +85 -0
  44. data/lib/deimos/monkey_patches/schema_store.rb +19 -0
  45. data/lib/deimos/producer.rb +218 -0
  46. data/lib/deimos/publish_backend.rb +30 -0
  47. data/lib/deimos/railtie.rb +8 -0
  48. data/lib/deimos/schema_coercer.rb +108 -0
  49. data/lib/deimos/shared_config.rb +59 -0
  50. data/lib/deimos/test_helpers.rb +356 -0
  51. data/lib/deimos/tracing/datadog.rb +35 -0
  52. data/lib/deimos/tracing/mock.rb +40 -0
  53. data/lib/deimos/tracing/provider.rb +31 -0
  54. data/lib/deimos/utils/db_producer.rb +122 -0
  55. data/lib/deimos/utils/executor.rb +117 -0
  56. data/lib/deimos/utils/inline_consumer.rb +144 -0
  57. data/lib/deimos/utils/lag_reporter.rb +182 -0
  58. data/lib/deimos/utils/platform_schema_validation.rb +0 -0
  59. data/lib/deimos/utils/signal_handler.rb +68 -0
  60. data/lib/deimos/version.rb +5 -0
  61. data/lib/deimos.rb +133 -0
  62. data/lib/generators/deimos/db_backend/templates/migration +24 -0
  63. data/lib/generators/deimos/db_backend/templates/rails3_migration +30 -0
  64. data/lib/generators/deimos/db_backend_generator.rb +48 -0
  65. data/lib/tasks/deimos.rake +27 -0
  66. data/spec/active_record_consumer_spec.rb +81 -0
  67. data/spec/active_record_producer_spec.rb +107 -0
  68. data/spec/avro_data_decoder_spec.rb +18 -0
  69. data/spec/avro_data_encoder_spec.rb +37 -0
  70. data/spec/backends/db_spec.rb +35 -0
  71. data/spec/backends/kafka_async_spec.rb +11 -0
  72. data/spec/backends/kafka_spec.rb +11 -0
  73. data/spec/consumer_spec.rb +169 -0
  74. data/spec/deimos_spec.rb +120 -0
  75. data/spec/kafka_source_spec.rb +168 -0
  76. data/spec/kafka_topic_info_spec.rb +88 -0
  77. data/spec/phobos.bad_db.yml +73 -0
  78. data/spec/phobos.yml +73 -0
  79. data/spec/producer_spec.rb +397 -0
  80. data/spec/publish_backend_spec.rb +10 -0
  81. data/spec/schemas/com/my-namespace/MySchema-key.avsc +13 -0
  82. data/spec/schemas/com/my-namespace/MySchema.avsc +18 -0
  83. data/spec/schemas/com/my-namespace/MySchemaWithBooleans.avsc +18 -0
  84. data/spec/schemas/com/my-namespace/MySchemaWithDateTimes.avsc +33 -0
  85. data/spec/schemas/com/my-namespace/MySchemaWithId.avsc +28 -0
  86. data/spec/schemas/com/my-namespace/MySchemaWithUniqueId.avsc +32 -0
  87. data/spec/schemas/com/my-namespace/Widget.avsc +27 -0
  88. data/spec/schemas/com/my-namespace/WidgetTheSecond.avsc +27 -0
  89. data/spec/spec_helper.rb +207 -0
  90. data/spec/updateable_schema_store_spec.rb +36 -0
  91. data/spec/utils/db_producer_spec.rb +259 -0
  92. data/spec/utils/executor_spec.rb +42 -0
  93. data/spec/utils/lag_reporter_spec.rb +69 -0
  94. data/spec/utils/platform_schema_validation_spec.rb +0 -0
  95. data/spec/utils/signal_handler_spec.rb +16 -0
  96. data/support/deimos-solo.png +0 -0
  97. data/support/deimos-with-name-next.png +0 -0
  98. data/support/deimos-with-name.png +0 -0
  99. data/support/flipp-logo.png +0 -0
  100. metadata +452 -0
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ module Backends
5
+ # Default backend to produce to Kafka.
6
+ class Kafka < Deimos::PublishBackend
7
+ include Phobos::Producer
8
+
9
+ # :nodoc:
10
+ def self.execute(producer_class:, messages:)
11
+ Deimos.instrument(
12
+ 'produce',
13
+ producer: producer_class,
14
+ topic: producer_class.topic,
15
+ payloads: messages.map(&:payload)
16
+ ) do
17
+ producer.publish_list(messages.map(&:encoded_hash))
18
+ Deimos.config.metrics&.increment(
19
+ 'publish',
20
+ tags: %W(status:success topic:#{producer_class.topic}),
21
+ by: messages.size
22
+ )
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ module Backends
5
+ # Backend which produces to Kafka via an async producer.
6
+ class KafkaAsync < Deimos::PublishBackend
7
+ include Phobos::Producer
8
+
9
+ # :nodoc:
10
+ def self.execute(producer_class:, messages:)
11
+ Deimos.instrument(
12
+ 'produce',
13
+ producer: producer_class,
14
+ topic: producer_class.topic,
15
+ payloads: messages.map(&:payload)
16
+ ) do
17
+ producer.async_publish_list(messages.map(&:encoded_hash))
18
+ Deimos.config.metrics&.increment(
19
+ 'publish',
20
+ tags: %W(status:success topic:#{producer_class.topic}),
21
+ by: messages.size
22
+ )
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,90 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ # Class to hold configuration.
5
+ class Configuration
6
+ # @return [Logger]
7
+ attr_accessor :logger
8
+ attr_accessor :phobos_logger
9
+ attr_accessor :kafka_logger
10
+
11
+ # By default, consumer errors will be consumed and logged to
12
+ # the metrics provider.
13
+ # Set this to true to force the error to be raised.
14
+ # @return [Boolean]
15
+ attr_accessor :reraise_consumer_errors
16
+
17
+ # @return [String]
18
+ attr_accessor :schema_registry_url
19
+
20
+ # @return [String]
21
+ attr_accessor :seed_broker
22
+
23
+ # Local path to schemas.
24
+ # @return [String]
25
+ attr_accessor :schema_path
26
+
27
+ # Default namespace for all producers. Can remain nil. Individual
28
+ # producers can override.
29
+ # @return [String]
30
+ attr_accessor :producer_schema_namespace
31
+
32
+ # Add a prefix to all topic names. This can be useful if you're using
33
+ # the same Kafka broker for different environments that are producing
34
+ # the same topics.
35
+ # @return [String]
36
+ attr_accessor :producer_topic_prefix
37
+
38
+ # Disable all actual message producing. Useful when doing things like
39
+ # mass imports or data space management when events don't need to be
40
+ # fired.
41
+ # @return [Boolean]
42
+ attr_accessor :disable_producers
43
+
44
+ # File path to the Phobos configuration file, relative to the application root.
45
+ # @return [String]
46
+ attr_accessor :phobos_config_file
47
+
48
+ # @return [Boolean]
49
+ attr_accessor :ssl_enabled
50
+
51
+ # @return [String]
52
+ attr_accessor :ssl_ca_cert
53
+
54
+ # @return [String]
55
+ attr_accessor :ssl_client_cert
56
+
57
+ # @return [String]
58
+ attr_accessor :ssl_client_cert_key
59
+
60
+ # Currently can be set to :db, :kafka, or :async_kafka. If using Kafka
61
+ # directly, set to async in your user-facing app, and sync in your
62
+ # consumers or delayed workers.
63
+ # @return [Symbol]
64
+ attr_accessor :publish_backend
65
+
66
+ # @return [Boolean]
67
+ attr_accessor :report_lag
68
+
69
+ # @return [Metrics::Provider]
70
+ attr_accessor :metrics
71
+
72
+ # @return [Tracing::Provider]
73
+ attr_accessor :tracer
74
+
75
+ # :nodoc:
76
+ def initialize
77
+ @phobos_config_file = 'config/phobos.yml'
78
+ @publish_backend = :kafka_async
79
+ end
80
+
81
+ # @param other_config [Configuration]
82
+ # @return [Boolean]
83
+ def phobos_config_changed?(other_config)
84
+ phobos_keys = %w(seed_broker phobos_config_file ssl_ca_cert ssl_client_cert ssl_client_cert_key)
85
+ return true if phobos_keys.any? { |key| self.send(key) != other_config.send(key) }
86
+
87
+ other_config.logger != self.logger
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,164 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'deimos/avro_data_decoder'
4
+ require 'deimos/shared_config'
5
+ require 'phobos/handler'
6
+ require 'active_support/all'
7
+ require 'ddtrace'
8
+
9
+ # Class to consume messages coming from the pipeline topic
10
+ # Note: According to the docs, instances of your handler will be created
11
+ # for every incoming message. This class should be lightweight.
12
+ module Deimos
13
+ # Parent consumer class.
14
+ class Consumer
15
+ include Phobos::Handler
16
+ include SharedConfig
17
+
18
+ class << self
19
+ # @return [AvroDataEncoder]
20
+ def decoder
21
+ @decoder ||= AvroDataDecoder.new(schema: config[:schema],
22
+ namespace: config[:namespace])
23
+ end
24
+
25
+ # @return [AvroDataEncoder]
26
+ def key_decoder
27
+ @key_decoder ||= AvroDataDecoder.new(schema: config[:key_schema],
28
+ namespace: config[:namespace])
29
+ end
30
+ end
31
+
32
+ # :nodoc:
33
+ def around_consume(payload, metadata)
34
+ _received_message(payload, metadata)
35
+ benchmark = Benchmark.measure do
36
+ _with_error_span(payload, metadata) { yield }
37
+ end
38
+ _handle_success(benchmark.real, payload, metadata)
39
+ end
40
+
41
+ # :nodoc:
42
+ def before_consume(payload, metadata)
43
+ _with_error_span(payload, metadata) do
44
+ if self.class.config[:key_schema] || self.class.config[:key_field]
45
+ metadata[:key] = decode_key(metadata[:key])
46
+ end
47
+ self.class.decoder.decode(payload) if payload.present?
48
+ end
49
+ end
50
+
51
+ # Helper method to decode an Avro-encoded key.
52
+ # @param key [String]
53
+ # @return [Object] the decoded key.
54
+ def decode_key(key)
55
+ return nil if key.nil?
56
+
57
+ config = self.class.config
58
+ if config[:encode_key] && config[:key_field].nil? &&
59
+ config[:key_schema].nil?
60
+ raise 'No key config given - if you are not decoding keys, please use `key_config plain: true`'
61
+ end
62
+
63
+ if config[:key_field]
64
+ self.class.decoder.decode_key(key, config[:key_field])
65
+ elsif config[:key_schema]
66
+ self.class.key_decoder.decode(key, schema: config[:key_schema])
67
+ else # no encoding
68
+ key
69
+ end
70
+ end
71
+
72
+ # Consume incoming messages.
73
+ # @param _payload [String]
74
+ # @param _metadata [Hash]
75
+ def consume(_payload, _metadata)
76
+ raise NotImplementedError
77
+ end
78
+
79
+ private
80
+
81
+ # @param payload [Hash|String]
82
+ # @param metadata [Hash]
83
+ def _with_error_span(payload, metadata)
84
+ @span = Deimos.config.tracer&.start(
85
+ 'deimos-consumer',
86
+ resource: self.class.name.gsub('::', '-')
87
+ )
88
+ yield
89
+ rescue StandardError => e
90
+ _handle_error(e, payload, metadata)
91
+ ensure
92
+ Deimos.config.tracer&.finish(@span)
93
+ end
94
+
95
+ def _received_message(payload, metadata)
96
+ Deimos.config.logger.info(
97
+ message: 'Got Kafka event',
98
+ payload: payload,
99
+ metadata: metadata
100
+ )
101
+ Deimos.config.metrics&.increment('handler', tags: %W(
102
+ status:received
103
+ topic:#{metadata[:topic]}
104
+ ))
105
+ end
106
+
107
+ # @param exception [Throwable]
108
+ # @param payload [Hash]
109
+ # @param metadata [Hash]
110
+ def _handle_error(exception, payload, metadata)
111
+ Deimos.config.tracer&.set_error(@span, exception)
112
+ Deimos.config.metrics&.increment(
113
+ 'handler',
114
+ tags: %W(
115
+ status:error
116
+ topic:#{metadata[:topic]}
117
+ )
118
+ )
119
+ Deimos.config.logger.warn(
120
+ message: 'Error consuming message',
121
+ handler: self.class.name,
122
+ metadata: metadata,
123
+ data: payload,
124
+ error_message: exception.message,
125
+ error: exception.backtrace
126
+ )
127
+ raise if Deimos.config.reraise_consumer_errors
128
+ end
129
+
130
+ # @param time_taken [Float]
131
+ # @param payload [Hash]
132
+ # @param metadata [Hash]
133
+ def _handle_success(time_taken, payload, metadata)
134
+ Deimos.config.metrics&.histogram('handler', time_taken, tags: %W(
135
+ time:consume
136
+ topic:#{metadata[:topic]}
137
+ ))
138
+ Deimos.config.metrics&.increment('handler', tags: %W(
139
+ status:success
140
+ topic:#{metadata[:topic]}
141
+ ))
142
+ Deimos.config.logger.info(
143
+ message: 'Finished processing Kafka event',
144
+ payload: payload,
145
+ time_elapsed: time_taken,
146
+ metadata: metadata
147
+ )
148
+ return if payload.nil? || payload['timestamp'].blank?
149
+
150
+ begin
151
+ time_delayed = Time.now.in_time_zone - payload['timestamp'].to_datetime
152
+ rescue ArgumentError
153
+ Deimos.config.logger.info(
154
+ message: "Error parsing timestamp! #{payload['timestamp']}"
155
+ )
156
+ return
157
+ end
158
+ Deimos.config.metrics&.histogram('handler', time_delayed, tags: %W(
159
+ time:time_delayed
160
+ topic:#{metadata[:topic]}
161
+ ))
162
+ end
163
+ end
164
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'active_support/notifications'
4
+ require 'active_support/concern'
5
+
6
+ # :nodoc:
7
+ module Deimos
8
+ # Copied from Phobos instrumentation.
9
+ module Instrumentation
10
+ extend ActiveSupport::Concern
11
+ NAMESPACE = 'Deimos'
12
+
13
+ # :nodoc:
14
+ module ClassMethods
15
+ # :nodoc:
16
+ def subscribe(event)
17
+ ActiveSupport::Notifications.subscribe("#{NAMESPACE}.#{event}") do |*args|
18
+ yield(ActiveSupport::Notifications::Event.new(*args)) if block_given?
19
+ end
20
+ end
21
+
22
+ # :nodoc:
23
+ def unsubscribe(subscriber)
24
+ ActiveSupport::Notifications.unsubscribe(subscriber)
25
+ end
26
+
27
+ # :nodoc:
28
+ def instrument(event, extra={})
29
+ ActiveSupport::Notifications.instrument("#{NAMESPACE}.#{event}", extra) do |extra2|
30
+ yield(extra2) if block_given?
31
+ end
32
+ end
33
+ end
34
+ end
35
+
36
+ include Instrumentation
37
+
38
+ # This module listens to events published by RubyKafka.
39
+ module KafkaListener
40
+ # Listens for any exceptions that happen during publishing and re-publishes
41
+ # as a Deimos event.
42
+ # @param event [ActiveSupport::Notification]
43
+ def self.send_produce_error(event)
44
+ exception = event.payload[:exception_object]
45
+ return if !exception || !exception.respond_to?(:failed_messages)
46
+
47
+ messages = exception.failed_messages
48
+ messages.group_by(&:topic).each do |topic, batch|
49
+ next if batch.empty?
50
+
51
+ producer = batch.first.metadata[:producer_name]
52
+ payloads = batch.map { |m| m.metadata[:decoded_payload] }
53
+
54
+ Deimos.config.metrics&.count('publish_error', payloads.size,
55
+ tags: %W(topic:#{topic}))
56
+ Deimos.instrument(
57
+ 'produce_error',
58
+ producer: producer,
59
+ topic: topic,
60
+ exception_object: exception,
61
+ payloads: payloads
62
+ )
63
+ end
64
+ end
65
+ end
66
+
67
+ ActiveSupport::Notifications.subscribe('deliver_messages.producer.kafka') do |*args|
68
+ event = ActiveSupport::Notifications::Event.new(*args)
69
+ KafkaListener.send_produce_error(event)
70
+ end
71
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ # Store Kafka messages into the database.
5
+ class KafkaMessage < ActiveRecord::Base
6
+ self.table_name = 'kafka_messages'
7
+
8
+ validates_presence_of :message, :topic
9
+
10
+ # Ensure it gets turned into a string, e.g. for testing purposes. It
11
+ # should already be a string.
12
+ # @param mess [Object]
13
+ def message=(mess)
14
+ write_attribute(:message, mess.to_s)
15
+ end
16
+
17
+ # @return [Hash]
18
+ def phobos_message
19
+ {
20
+ payload: self.message,
21
+ partition_key: self.partition_key,
22
+ key: self.key,
23
+ topic: self.topic
24
+ }
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,126 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ # Represents an object which needs to inform Kafka when it is saved or
5
+ # bulk imported.
6
+ module KafkaSource
7
+ extend ActiveSupport::Concern
8
+
9
+ included do
10
+ after_create(:send_kafka_event_on_create)
11
+ after_update(:send_kafka_event_on_update)
12
+ after_destroy(:send_kafka_event_on_destroy)
13
+ end
14
+
15
+ # Send the newly created model to Kafka.
16
+ def send_kafka_event_on_create
17
+ return unless self.persisted?
18
+ return unless self.class.kafka_config[:create]
19
+
20
+ self.class.kafka_producers.each { |p| p.send_event(self) }
21
+ end
22
+
23
+ # Send the newly updated model to Kafka.
24
+ def send_kafka_event_on_update
25
+ return unless self.class.kafka_config[:update]
26
+
27
+ producers = self.class.kafka_producers
28
+ fields = producers.flat_map(&:watched_attributes).uniq
29
+ fields -= ['updated_at']
30
+ # Only send an event if a field we care about was changed.
31
+ any_changes = fields.any? do |field|
32
+ field_change = self.previous_changes[field]
33
+ field_change.present? && field_change[0] != field_change[1]
34
+ end
35
+ return unless any_changes
36
+
37
+ producers.each { |p| p.send_event(self) }
38
+ end
39
+
40
+ # Send a deletion (null payload) event to Kafka.
41
+ def send_kafka_event_on_destroy
42
+ return unless self.class.kafka_config[:delete]
43
+
44
+ self.class.kafka_producers.each { |p| p.send_event(self.deletion_payload) }
45
+ end
46
+
47
+ # Payload to send after we are destroyed.
48
+ # @return [Hash]
49
+ def deletion_payload
50
+ { payload_key: self[self.class.primary_key] }
51
+ end
52
+
53
+ # :nodoc:
54
+ module ClassMethods
55
+ # @return [Hash]
56
+ def kafka_config
57
+ {
58
+ update: true,
59
+ delete: true,
60
+ import: true,
61
+ create: true
62
+ }
63
+ end
64
+
65
+ # @return [Array<Deimos::ActiveRecordProducer>] the producers to run.
66
+ def kafka_producers
67
+ raise NotImplementedError if self.method(:kafka_producer).
68
+ owner == Deimos::KafkaSource
69
+
70
+ [self.kafka_producer]
71
+ end
72
+
73
+ # Deprecated - use #kafka_producers instead.
74
+ # @return [Deimos::ActiveRecordProducer] the producer to use.
75
+ def kafka_producer
76
+ raise NotImplementedError if self.method(:kafka_producers).
77
+ owner == Deimos::KafkaSource
78
+
79
+ self.kafka_producers.first
80
+ end
81
+
82
+ # This is an internal method, part of the activerecord_import gem. It's
83
+ # the one that actually does the importing, having already normalized
84
+ # the inputs (arrays, hashes, records etc.)
85
+ # Basically we want to first do the import, then reload the records
86
+ # and send them to Kafka.
87
+ def import_without_validations_or_callbacks(column_names,
88
+ array_of_attributes,
89
+ options={})
90
+ results = super
91
+ return unless self.kafka_config[:import]
92
+ return if array_of_attributes.empty?
93
+
94
+ # This will contain an array of hashes, where each hash is the actual
95
+ # attribute hash that created the object.
96
+ ids =
97
+ ids = if results.is_a?(Array)
98
+ results[1]
99
+ elsif results.respond_to?(:ids)
100
+ results.ids
101
+ else
102
+ []
103
+ end
104
+ if ids.blank?
105
+ # re-fill IDs based on what was just entered into the DB.
106
+ if self.connection.adapter_name.downcase =~ /sqlite/
107
+ last_id = self.connection.select_value('select last_insert_rowid()')
108
+ ids = ((last_id - array_of_attributes.size + 1)..last_id).to_a
109
+ else # mysql
110
+ last_id = self.connection.select_value('select LAST_INSERT_ID()')
111
+ ids = (last_id..(last_id + array_of_attributes.size)).to_a
112
+ end
113
+ end
114
+ array_of_hashes = []
115
+ array_of_attributes.each_with_index do |array, i|
116
+ hash = column_names.zip(array).to_h.with_indifferent_access
117
+ hash[self.primary_key] = ids[i] if hash[self.primary_key].blank?
118
+ array_of_hashes << hash
119
+ end
120
+
121
+ self.kafka_producers.each { |p| p.send_events(array_of_hashes) }
122
+ results
123
+ end
124
+ end
125
+ end
126
+ end
@@ -0,0 +1,86 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ # Record that keeps track of which topics are being worked on by DbProducers.
5
+ class KafkaTopicInfo < ActiveRecord::Base
6
+ self.table_name = 'kafka_topic_info'
7
+
8
+ class << self
9
+ # Lock a topic for the given ID. Returns whether the lock was successful.
10
+ # @param topic [String]
11
+ # @param lock_id [String]
12
+ # @return [Boolean]
13
+ def lock(topic, lock_id)
14
+ # Try to create it - it's fine if it already exists
15
+ begin
16
+ self.create(topic: topic)
17
+ rescue ActiveRecord::RecordNotUnique # rubocop:disable Lint/HandleExceptions
18
+ # continue on
19
+ end
20
+
21
+ # Lock the record
22
+ qtopic = self.connection.quote(topic)
23
+ qlock_id = self.connection.quote(lock_id)
24
+ qtable = self.connection.quote_table_name('kafka_topic_info')
25
+ qnow = self.connection.quote(Time.zone.now.to_s(:db))
26
+ qfalse = self.connection.quoted_false
27
+ qtime = self.connection.quote(1.minute.ago.to_s(:db))
28
+
29
+ # If a record is marked as error and less than 1 minute old,
30
+ # we don't want to pick it up even if not currently locked because
31
+ # we worry we'll run into the same problem again.
32
+ # Once it's more than 1 minute old, we figure it's OK to try again
33
+ # so we can pick up any topic that's that old, even if it was
34
+ # locked by someone, because it's the job of the producer to keep
35
+ # updating the locked_at timestamp as they work on messages in that
36
+ # topic. If the locked_at timestamp is that old, chances are that
37
+ # the producer crashed.
38
+ sql = <<~SQL
39
+ UPDATE #{qtable}
40
+ SET locked_by=#{qlock_id}, locked_at=#{qnow}, error=#{qfalse}
41
+ WHERE topic=#{qtopic} AND
42
+ ((locked_by IS NULL AND error=#{qfalse}) OR locked_at < #{qtime})
43
+ SQL
44
+ self.connection.update(sql)
45
+ self.where(locked_by: lock_id, topic: topic).any?
46
+ end
47
+
48
+ # This is called once a producer is finished working on a topic, i.e.
49
+ # there are no more messages to fetch. It unlocks the topic and
50
+ # moves on to the next one.
51
+ # @param topic [String]
52
+ # @param lock_id [String]
53
+ def clear_lock(topic, lock_id)
54
+ self.where(topic: topic, locked_by: lock_id).
55
+ update_all(locked_by: nil, locked_at: nil, error: false, retries: 0)
56
+ end
57
+
58
+ # The producer calls this if it gets an error sending messages. This
59
+ # essentially locks down this topic for 1 minute (for all producers)
60
+ # and allows the caller to continue to the next topic.
61
+ # @param topic [String]
62
+ # @param lock_id [String]
63
+ def register_error(topic, lock_id)
64
+ record = self.where(topic: topic, locked_by: lock_id).last
65
+ attr_hash = { locked_by: nil,
66
+ locked_at: Time.zone.now,
67
+ error: true,
68
+ retries: record.retries + 1 }
69
+ if Rails::VERSION::MAJOR >= 4
70
+ record.update!(attr_hash)
71
+ else
72
+ record.update_attributes!(attr_hash)
73
+ end
74
+ end
75
+
76
+ # Update the locked_at timestamp to indicate that the producer is still
77
+ # working on those messages and to continue.
78
+ # @param topic [String]
79
+ # @param lock_id [String]
80
+ def heartbeat(topic, lock_id)
81
+ self.where(topic: topic, locked_by: lock_id).
82
+ update_all(locked_at: Time.zone.now)
83
+ end
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ # Basically a struct to hold the message as it's processed.
5
+ class Message
6
+ attr_accessor :payload, :key, :partition_key, :encoded_key,
7
+ :encoded_payload, :topic, :producer_name
8
+
9
+ # @param payload [Hash]
10
+ # @param producer [Class]
11
+ def initialize(payload, producer, topic: nil, key: nil, partition_key: nil)
12
+ @payload = payload&.with_indifferent_access
13
+ @producer_name = producer.name
14
+ @topic = topic
15
+ @key = key
16
+ @partition_key = partition_key
17
+ end
18
+
19
+ # Add message_id and timestamp default values if they are in the
20
+ # schema and don't already have values.
21
+ # @param schema [Avro::Schema]
22
+ def add_fields(schema)
23
+ return if @payload.except(:payload_key, :partition_key).blank?
24
+
25
+ if schema.fields.any? { |f| f.name == 'message_id' }
26
+ @payload['message_id'] ||= SecureRandom.uuid
27
+ end
28
+ if schema.fields.any? { |f| f.name == 'timestamp' }
29
+ @payload['timestamp'] ||= Time.now.in_time_zone.to_s
30
+ end
31
+ end
32
+
33
+ # @param schema [Avro::Schema]
34
+ def coerce_fields(schema)
35
+ return if payload.nil?
36
+
37
+ @payload = SchemaCoercer.new(schema).coerce(@payload)
38
+ end
39
+
40
+ # @return [Hash]
41
+ def encoded_hash
42
+ {
43
+ topic: @topic,
44
+ key: @encoded_key,
45
+ partition_key: @partition_key || @encoded_key,
46
+ payload: @encoded_payload,
47
+ metadata: {
48
+ decoded_payload: @payload,
49
+ producer_name: @producer_name
50
+ }
51
+ }
52
+ end
53
+
54
+ # @return [Hash]
55
+ def to_h
56
+ {
57
+ topic: @topic,
58
+ key: @key,
59
+ partition_key: @partition_key || @key,
60
+ payload: @payload,
61
+ metadata: {
62
+ decoded_payload: @payload,
63
+ producer_name: @producer_name
64
+ }
65
+ }
66
+ end
67
+
68
+ # @param other [Message]
69
+ # @return [Boolean]
70
+ def ==(other)
71
+ self.to_h == other.to_h
72
+ end
73
+ end
74
+ end