deimos-ruby 1.0.0.pre.beta22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. checksums.yaml +7 -0
  2. data/.circleci/config.yml +74 -0
  3. data/.gitignore +41 -0
  4. data/.gitmodules +0 -0
  5. data/.rspec +1 -0
  6. data/.rubocop.yml +321 -0
  7. data/.ruby-gemset +1 -0
  8. data/.ruby-version +1 -0
  9. data/CHANGELOG.md +32 -0
  10. data/CODE_OF_CONDUCT.md +77 -0
  11. data/Dockerfile +23 -0
  12. data/Gemfile +6 -0
  13. data/Gemfile.lock +165 -0
  14. data/Guardfile +22 -0
  15. data/LICENSE.md +195 -0
  16. data/README.md +752 -0
  17. data/Rakefile +13 -0
  18. data/bin/deimos +4 -0
  19. data/deimos-kafka.gemspec +42 -0
  20. data/docker-compose.yml +71 -0
  21. data/docs/DATABASE_BACKEND.md +147 -0
  22. data/docs/PULL_REQUEST_TEMPLATE.md +34 -0
  23. data/lib/deimos/active_record_consumer.rb +81 -0
  24. data/lib/deimos/active_record_producer.rb +64 -0
  25. data/lib/deimos/avro_data_coder.rb +89 -0
  26. data/lib/deimos/avro_data_decoder.rb +36 -0
  27. data/lib/deimos/avro_data_encoder.rb +51 -0
  28. data/lib/deimos/backends/db.rb +27 -0
  29. data/lib/deimos/backends/kafka.rb +27 -0
  30. data/lib/deimos/backends/kafka_async.rb +27 -0
  31. data/lib/deimos/configuration.rb +90 -0
  32. data/lib/deimos/consumer.rb +164 -0
  33. data/lib/deimos/instrumentation.rb +71 -0
  34. data/lib/deimos/kafka_message.rb +27 -0
  35. data/lib/deimos/kafka_source.rb +126 -0
  36. data/lib/deimos/kafka_topic_info.rb +86 -0
  37. data/lib/deimos/message.rb +74 -0
  38. data/lib/deimos/metrics/datadog.rb +47 -0
  39. data/lib/deimos/metrics/mock.rb +39 -0
  40. data/lib/deimos/metrics/provider.rb +38 -0
  41. data/lib/deimos/monkey_patches/phobos_cli.rb +35 -0
  42. data/lib/deimos/monkey_patches/phobos_producer.rb +51 -0
  43. data/lib/deimos/monkey_patches/ruby_kafka_heartbeat.rb +85 -0
  44. data/lib/deimos/monkey_patches/schema_store.rb +19 -0
  45. data/lib/deimos/producer.rb +218 -0
  46. data/lib/deimos/publish_backend.rb +30 -0
  47. data/lib/deimos/railtie.rb +8 -0
  48. data/lib/deimos/schema_coercer.rb +108 -0
  49. data/lib/deimos/shared_config.rb +59 -0
  50. data/lib/deimos/test_helpers.rb +356 -0
  51. data/lib/deimos/tracing/datadog.rb +35 -0
  52. data/lib/deimos/tracing/mock.rb +40 -0
  53. data/lib/deimos/tracing/provider.rb +31 -0
  54. data/lib/deimos/utils/db_producer.rb +122 -0
  55. data/lib/deimos/utils/executor.rb +117 -0
  56. data/lib/deimos/utils/inline_consumer.rb +144 -0
  57. data/lib/deimos/utils/lag_reporter.rb +182 -0
  58. data/lib/deimos/utils/platform_schema_validation.rb +0 -0
  59. data/lib/deimos/utils/signal_handler.rb +68 -0
  60. data/lib/deimos/version.rb +5 -0
  61. data/lib/deimos.rb +133 -0
  62. data/lib/generators/deimos/db_backend/templates/migration +24 -0
  63. data/lib/generators/deimos/db_backend/templates/rails3_migration +30 -0
  64. data/lib/generators/deimos/db_backend_generator.rb +48 -0
  65. data/lib/tasks/deimos.rake +27 -0
  66. data/spec/active_record_consumer_spec.rb +81 -0
  67. data/spec/active_record_producer_spec.rb +107 -0
  68. data/spec/avro_data_decoder_spec.rb +18 -0
  69. data/spec/avro_data_encoder_spec.rb +37 -0
  70. data/spec/backends/db_spec.rb +35 -0
  71. data/spec/backends/kafka_async_spec.rb +11 -0
  72. data/spec/backends/kafka_spec.rb +11 -0
  73. data/spec/consumer_spec.rb +169 -0
  74. data/spec/deimos_spec.rb +120 -0
  75. data/spec/kafka_source_spec.rb +168 -0
  76. data/spec/kafka_topic_info_spec.rb +88 -0
  77. data/spec/phobos.bad_db.yml +73 -0
  78. data/spec/phobos.yml +73 -0
  79. data/spec/producer_spec.rb +397 -0
  80. data/spec/publish_backend_spec.rb +10 -0
  81. data/spec/schemas/com/my-namespace/MySchema-key.avsc +13 -0
  82. data/spec/schemas/com/my-namespace/MySchema.avsc +18 -0
  83. data/spec/schemas/com/my-namespace/MySchemaWithBooleans.avsc +18 -0
  84. data/spec/schemas/com/my-namespace/MySchemaWithDateTimes.avsc +33 -0
  85. data/spec/schemas/com/my-namespace/MySchemaWithId.avsc +28 -0
  86. data/spec/schemas/com/my-namespace/MySchemaWithUniqueId.avsc +32 -0
  87. data/spec/schemas/com/my-namespace/Widget.avsc +27 -0
  88. data/spec/schemas/com/my-namespace/WidgetTheSecond.avsc +27 -0
  89. data/spec/spec_helper.rb +207 -0
  90. data/spec/updateable_schema_store_spec.rb +36 -0
  91. data/spec/utils/db_producer_spec.rb +259 -0
  92. data/spec/utils/executor_spec.rb +42 -0
  93. data/spec/utils/lag_reporter_spec.rb +69 -0
  94. data/spec/utils/platform_schema_validation_spec.rb +0 -0
  95. data/spec/utils/signal_handler_spec.rb +16 -0
  96. data/support/deimos-solo.png +0 -0
  97. data/support/deimos-with-name-next.png +0 -0
  98. data/support/deimos-with-name.png +0 -0
  99. data/support/flipp-logo.png +0 -0
  100. metadata +452 -0
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ module Backends
5
+ # Default backend to produce to Kafka.
6
+ class Kafka < Deimos::PublishBackend
7
+ include Phobos::Producer
8
+
9
+ # :nodoc:
10
+ def self.execute(producer_class:, messages:)
11
+ Deimos.instrument(
12
+ 'produce',
13
+ producer: producer_class,
14
+ topic: producer_class.topic,
15
+ payloads: messages.map(&:payload)
16
+ ) do
17
+ producer.publish_list(messages.map(&:encoded_hash))
18
+ Deimos.config.metrics&.increment(
19
+ 'publish',
20
+ tags: %W(status:success topic:#{producer_class.topic}),
21
+ by: messages.size
22
+ )
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ module Backends
5
+ # Backend which produces to Kafka via an async producer.
6
+ class KafkaAsync < Deimos::PublishBackend
7
+ include Phobos::Producer
8
+
9
+ # :nodoc:
10
+ def self.execute(producer_class:, messages:)
11
+ Deimos.instrument(
12
+ 'produce',
13
+ producer: producer_class,
14
+ topic: producer_class.topic,
15
+ payloads: messages.map(&:payload)
16
+ ) do
17
+ producer.async_publish_list(messages.map(&:encoded_hash))
18
+ Deimos.config.metrics&.increment(
19
+ 'publish',
20
+ tags: %W(status:success topic:#{producer_class.topic}),
21
+ by: messages.size
22
+ )
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,90 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ # Class to hold configuration.
5
+ class Configuration
6
+ # @return [Logger]
7
+ attr_accessor :logger
8
+ attr_accessor :phobos_logger
9
+ attr_accessor :kafka_logger
10
+
11
+ # By default, consumer errors will be consumed and logged to
12
+ # the metrics provider.
13
+ # Set this to true to force the error to be raised.
14
+ # @return [Boolean]
15
+ attr_accessor :reraise_consumer_errors
16
+
17
+ # @return [String]
18
+ attr_accessor :schema_registry_url
19
+
20
+ # @return [String]
21
+ attr_accessor :seed_broker
22
+
23
+ # Local path to schemas.
24
+ # @return [String]
25
+ attr_accessor :schema_path
26
+
27
+ # Default namespace for all producers. Can remain nil. Individual
28
+ # producers can override.
29
+ # @return [String]
30
+ attr_accessor :producer_schema_namespace
31
+
32
+ # Add a prefix to all topic names. This can be useful if you're using
33
+ # the same Kafka broker for different environments that are producing
34
+ # the same topics.
35
+ # @return [String]
36
+ attr_accessor :producer_topic_prefix
37
+
38
+ # Disable all actual message producing. Useful when doing things like
39
+ # mass imports or data space management when events don't need to be
40
+ # fired.
41
+ # @return [Boolean]
42
+ attr_accessor :disable_producers
43
+
44
+ # File path to the Phobos configuration file, relative to the application root.
45
+ # @return [String]
46
+ attr_accessor :phobos_config_file
47
+
48
+ # @return [Boolean]
49
+ attr_accessor :ssl_enabled
50
+
51
+ # @return [String]
52
+ attr_accessor :ssl_ca_cert
53
+
54
+ # @return [String]
55
+ attr_accessor :ssl_client_cert
56
+
57
+ # @return [String]
58
+ attr_accessor :ssl_client_cert_key
59
+
60
+ # Currently can be set to :db, :kafka, or :async_kafka. If using Kafka
61
+ # directly, set to async in your user-facing app, and sync in your
62
+ # consumers or delayed workers.
63
+ # @return [Symbol]
64
+ attr_accessor :publish_backend
65
+
66
+ # @return [Boolean]
67
+ attr_accessor :report_lag
68
+
69
+ # @return [Metrics::Provider]
70
+ attr_accessor :metrics
71
+
72
+ # @return [Tracing::Provider]
73
+ attr_accessor :tracer
74
+
75
+ # :nodoc:
76
+ def initialize
77
+ @phobos_config_file = 'config/phobos.yml'
78
+ @publish_backend = :kafka_async
79
+ end
80
+
81
+ # @param other_config [Configuration]
82
+ # @return [Boolean]
83
+ def phobos_config_changed?(other_config)
84
+ phobos_keys = %w(seed_broker phobos_config_file ssl_ca_cert ssl_client_cert ssl_client_cert_key)
85
+ return true if phobos_keys.any? { |key| self.send(key) != other_config.send(key) }
86
+
87
+ other_config.logger != self.logger
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,164 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'deimos/avro_data_decoder'
4
+ require 'deimos/shared_config'
5
+ require 'phobos/handler'
6
+ require 'active_support/all'
7
+ require 'ddtrace'
8
+
9
+ # Class to consume messages coming from the pipeline topic
10
+ # Note: According to the docs, instances of your handler will be created
11
+ # for every incoming message. This class should be lightweight.
12
+ module Deimos
13
+ # Parent consumer class.
14
+ class Consumer
15
+ include Phobos::Handler
16
+ include SharedConfig
17
+
18
+ class << self
19
+ # @return [AvroDataEncoder]
20
+ def decoder
21
+ @decoder ||= AvroDataDecoder.new(schema: config[:schema],
22
+ namespace: config[:namespace])
23
+ end
24
+
25
+ # @return [AvroDataEncoder]
26
+ def key_decoder
27
+ @key_decoder ||= AvroDataDecoder.new(schema: config[:key_schema],
28
+ namespace: config[:namespace])
29
+ end
30
+ end
31
+
32
+ # :nodoc:
33
+ def around_consume(payload, metadata)
34
+ _received_message(payload, metadata)
35
+ benchmark = Benchmark.measure do
36
+ _with_error_span(payload, metadata) { yield }
37
+ end
38
+ _handle_success(benchmark.real, payload, metadata)
39
+ end
40
+
41
+ # :nodoc:
42
+ def before_consume(payload, metadata)
43
+ _with_error_span(payload, metadata) do
44
+ if self.class.config[:key_schema] || self.class.config[:key_field]
45
+ metadata[:key] = decode_key(metadata[:key])
46
+ end
47
+ self.class.decoder.decode(payload) if payload.present?
48
+ end
49
+ end
50
+
51
+ # Helper method to decode an Avro-encoded key.
52
+ # @param key [String]
53
+ # @return [Object] the decoded key.
54
+ def decode_key(key)
55
+ return nil if key.nil?
56
+
57
+ config = self.class.config
58
+ if config[:encode_key] && config[:key_field].nil? &&
59
+ config[:key_schema].nil?
60
+ raise 'No key config given - if you are not decoding keys, please use `key_config plain: true`'
61
+ end
62
+
63
+ if config[:key_field]
64
+ self.class.decoder.decode_key(key, config[:key_field])
65
+ elsif config[:key_schema]
66
+ self.class.key_decoder.decode(key, schema: config[:key_schema])
67
+ else # no encoding
68
+ key
69
+ end
70
+ end
71
+
72
+ # Consume incoming messages.
73
+ # @param _payload [String]
74
+ # @param _metadata [Hash]
75
+ def consume(_payload, _metadata)
76
+ raise NotImplementedError
77
+ end
78
+
79
+ private
80
+
81
+ # @param payload [Hash|String]
82
+ # @param metadata [Hash]
83
+ def _with_error_span(payload, metadata)
84
+ @span = Deimos.config.tracer&.start(
85
+ 'deimos-consumer',
86
+ resource: self.class.name.gsub('::', '-')
87
+ )
88
+ yield
89
+ rescue StandardError => e
90
+ _handle_error(e, payload, metadata)
91
+ ensure
92
+ Deimos.config.tracer&.finish(@span)
93
+ end
94
+
95
+ def _received_message(payload, metadata)
96
+ Deimos.config.logger.info(
97
+ message: 'Got Kafka event',
98
+ payload: payload,
99
+ metadata: metadata
100
+ )
101
+ Deimos.config.metrics&.increment('handler', tags: %W(
102
+ status:received
103
+ topic:#{metadata[:topic]}
104
+ ))
105
+ end
106
+
107
+ # @param exception [Throwable]
108
+ # @param payload [Hash]
109
+ # @param metadata [Hash]
110
+ def _handle_error(exception, payload, metadata)
111
+ Deimos.config.tracer&.set_error(@span, exception)
112
+ Deimos.config.metrics&.increment(
113
+ 'handler',
114
+ tags: %W(
115
+ status:error
116
+ topic:#{metadata[:topic]}
117
+ )
118
+ )
119
+ Deimos.config.logger.warn(
120
+ message: 'Error consuming message',
121
+ handler: self.class.name,
122
+ metadata: metadata,
123
+ data: payload,
124
+ error_message: exception.message,
125
+ error: exception.backtrace
126
+ )
127
+ raise if Deimos.config.reraise_consumer_errors
128
+ end
129
+
130
+ # @param time_taken [Float]
131
+ # @param payload [Hash]
132
+ # @param metadata [Hash]
133
+ def _handle_success(time_taken, payload, metadata)
134
+ Deimos.config.metrics&.histogram('handler', time_taken, tags: %W(
135
+ time:consume
136
+ topic:#{metadata[:topic]}
137
+ ))
138
+ Deimos.config.metrics&.increment('handler', tags: %W(
139
+ status:success
140
+ topic:#{metadata[:topic]}
141
+ ))
142
+ Deimos.config.logger.info(
143
+ message: 'Finished processing Kafka event',
144
+ payload: payload,
145
+ time_elapsed: time_taken,
146
+ metadata: metadata
147
+ )
148
+ return if payload.nil? || payload['timestamp'].blank?
149
+
150
+ begin
151
+ time_delayed = Time.now.in_time_zone - payload['timestamp'].to_datetime
152
+ rescue ArgumentError
153
+ Deimos.config.logger.info(
154
+ message: "Error parsing timestamp! #{payload['timestamp']}"
155
+ )
156
+ return
157
+ end
158
+ Deimos.config.metrics&.histogram('handler', time_delayed, tags: %W(
159
+ time:time_delayed
160
+ topic:#{metadata[:topic]}
161
+ ))
162
+ end
163
+ end
164
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'active_support/notifications'
4
+ require 'active_support/concern'
5
+
6
+ # :nodoc:
7
+ module Deimos
8
+ # Copied from Phobos instrumentation.
9
+ module Instrumentation
10
+ extend ActiveSupport::Concern
11
+ NAMESPACE = 'Deimos'
12
+
13
+ # :nodoc:
14
+ module ClassMethods
15
+ # :nodoc:
16
+ def subscribe(event)
17
+ ActiveSupport::Notifications.subscribe("#{NAMESPACE}.#{event}") do |*args|
18
+ yield(ActiveSupport::Notifications::Event.new(*args)) if block_given?
19
+ end
20
+ end
21
+
22
+ # :nodoc:
23
+ def unsubscribe(subscriber)
24
+ ActiveSupport::Notifications.unsubscribe(subscriber)
25
+ end
26
+
27
+ # :nodoc:
28
+ def instrument(event, extra={})
29
+ ActiveSupport::Notifications.instrument("#{NAMESPACE}.#{event}", extra) do |extra2|
30
+ yield(extra2) if block_given?
31
+ end
32
+ end
33
+ end
34
+ end
35
+
36
+ include Instrumentation
37
+
38
+ # This module listens to events published by RubyKafka.
39
+ module KafkaListener
40
+ # Listens for any exceptions that happen during publishing and re-publishes
41
+ # as a Deimos event.
42
+ # @param event [ActiveSupport::Notification]
43
+ def self.send_produce_error(event)
44
+ exception = event.payload[:exception_object]
45
+ return if !exception || !exception.respond_to?(:failed_messages)
46
+
47
+ messages = exception.failed_messages
48
+ messages.group_by(&:topic).each do |topic, batch|
49
+ next if batch.empty?
50
+
51
+ producer = batch.first.metadata[:producer_name]
52
+ payloads = batch.map { |m| m.metadata[:decoded_payload] }
53
+
54
+ Deimos.config.metrics&.count('publish_error', payloads.size,
55
+ tags: %W(topic:#{topic}))
56
+ Deimos.instrument(
57
+ 'produce_error',
58
+ producer: producer,
59
+ topic: topic,
60
+ exception_object: exception,
61
+ payloads: payloads
62
+ )
63
+ end
64
+ end
65
+ end
66
+
67
+ ActiveSupport::Notifications.subscribe('deliver_messages.producer.kafka') do |*args|
68
+ event = ActiveSupport::Notifications::Event.new(*args)
69
+ KafkaListener.send_produce_error(event)
70
+ end
71
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ # Store Kafka messages into the database.
5
+ class KafkaMessage < ActiveRecord::Base
6
+ self.table_name = 'kafka_messages'
7
+
8
+ validates_presence_of :message, :topic
9
+
10
+ # Ensure it gets turned into a string, e.g. for testing purposes. It
11
+ # should already be a string.
12
+ # @param mess [Object]
13
+ def message=(mess)
14
+ write_attribute(:message, mess.to_s)
15
+ end
16
+
17
+ # @return [Hash]
18
+ def phobos_message
19
+ {
20
+ payload: self.message,
21
+ partition_key: self.partition_key,
22
+ key: self.key,
23
+ topic: self.topic
24
+ }
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,126 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ # Represents an object which needs to inform Kafka when it is saved or
5
+ # bulk imported.
6
+ module KafkaSource
7
+ extend ActiveSupport::Concern
8
+
9
+ included do
10
+ after_create(:send_kafka_event_on_create)
11
+ after_update(:send_kafka_event_on_update)
12
+ after_destroy(:send_kafka_event_on_destroy)
13
+ end
14
+
15
+ # Send the newly created model to Kafka.
16
+ def send_kafka_event_on_create
17
+ return unless self.persisted?
18
+ return unless self.class.kafka_config[:create]
19
+
20
+ self.class.kafka_producers.each { |p| p.send_event(self) }
21
+ end
22
+
23
+ # Send the newly updated model to Kafka.
24
+ def send_kafka_event_on_update
25
+ return unless self.class.kafka_config[:update]
26
+
27
+ producers = self.class.kafka_producers
28
+ fields = producers.flat_map(&:watched_attributes).uniq
29
+ fields -= ['updated_at']
30
+ # Only send an event if a field we care about was changed.
31
+ any_changes = fields.any? do |field|
32
+ field_change = self.previous_changes[field]
33
+ field_change.present? && field_change[0] != field_change[1]
34
+ end
35
+ return unless any_changes
36
+
37
+ producers.each { |p| p.send_event(self) }
38
+ end
39
+
40
+ # Send a deletion (null payload) event to Kafka.
41
+ def send_kafka_event_on_destroy
42
+ return unless self.class.kafka_config[:delete]
43
+
44
+ self.class.kafka_producers.each { |p| p.send_event(self.deletion_payload) }
45
+ end
46
+
47
+ # Payload to send after we are destroyed.
48
+ # @return [Hash]
49
+ def deletion_payload
50
+ { payload_key: self[self.class.primary_key] }
51
+ end
52
+
53
+ # :nodoc:
54
+ module ClassMethods
55
+ # @return [Hash]
56
+ def kafka_config
57
+ {
58
+ update: true,
59
+ delete: true,
60
+ import: true,
61
+ create: true
62
+ }
63
+ end
64
+
65
+ # @return [Array<Deimos::ActiveRecordProducer>] the producers to run.
66
+ def kafka_producers
67
+ raise NotImplementedError if self.method(:kafka_producer).
68
+ owner == Deimos::KafkaSource
69
+
70
+ [self.kafka_producer]
71
+ end
72
+
73
+ # Deprecated - use #kafka_producers instead.
74
+ # @return [Deimos::ActiveRecordProducer] the producer to use.
75
+ def kafka_producer
76
+ raise NotImplementedError if self.method(:kafka_producers).
77
+ owner == Deimos::KafkaSource
78
+
79
+ self.kafka_producers.first
80
+ end
81
+
82
+ # This is an internal method, part of the activerecord_import gem. It's
83
+ # the one that actually does the importing, having already normalized
84
+ # the inputs (arrays, hashes, records etc.)
85
+ # Basically we want to first do the import, then reload the records
86
+ # and send them to Kafka.
87
+ def import_without_validations_or_callbacks(column_names,
88
+ array_of_attributes,
89
+ options={})
90
+ results = super
91
+ return unless self.kafka_config[:import]
92
+ return if array_of_attributes.empty?
93
+
94
+ # This will contain an array of hashes, where each hash is the actual
95
+ # attribute hash that created the object.
96
+ ids =
97
+ ids = if results.is_a?(Array)
98
+ results[1]
99
+ elsif results.respond_to?(:ids)
100
+ results.ids
101
+ else
102
+ []
103
+ end
104
+ if ids.blank?
105
+ # re-fill IDs based on what was just entered into the DB.
106
+ if self.connection.adapter_name.downcase =~ /sqlite/
107
+ last_id = self.connection.select_value('select last_insert_rowid()')
108
+ ids = ((last_id - array_of_attributes.size + 1)..last_id).to_a
109
+ else # mysql
110
+ last_id = self.connection.select_value('select LAST_INSERT_ID()')
111
+ ids = (last_id..(last_id + array_of_attributes.size)).to_a
112
+ end
113
+ end
114
+ array_of_hashes = []
115
+ array_of_attributes.each_with_index do |array, i|
116
+ hash = column_names.zip(array).to_h.with_indifferent_access
117
+ hash[self.primary_key] = ids[i] if hash[self.primary_key].blank?
118
+ array_of_hashes << hash
119
+ end
120
+
121
+ self.kafka_producers.each { |p| p.send_events(array_of_hashes) }
122
+ results
123
+ end
124
+ end
125
+ end
126
+ end
@@ -0,0 +1,86 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ # Record that keeps track of which topics are being worked on by DbProducers.
5
+ class KafkaTopicInfo < ActiveRecord::Base
6
+ self.table_name = 'kafka_topic_info'
7
+
8
+ class << self
9
+ # Lock a topic for the given ID. Returns whether the lock was successful.
10
+ # @param topic [String]
11
+ # @param lock_id [String]
12
+ # @return [Boolean]
13
+ def lock(topic, lock_id)
14
+ # Try to create it - it's fine if it already exists
15
+ begin
16
+ self.create(topic: topic)
17
+ rescue ActiveRecord::RecordNotUnique # rubocop:disable Lint/HandleExceptions
18
+ # continue on
19
+ end
20
+
21
+ # Lock the record
22
+ qtopic = self.connection.quote(topic)
23
+ qlock_id = self.connection.quote(lock_id)
24
+ qtable = self.connection.quote_table_name('kafka_topic_info')
25
+ qnow = self.connection.quote(Time.zone.now.to_s(:db))
26
+ qfalse = self.connection.quoted_false
27
+ qtime = self.connection.quote(1.minute.ago.to_s(:db))
28
+
29
+ # If a record is marked as error and less than 1 minute old,
30
+ # we don't want to pick it up even if not currently locked because
31
+ # we worry we'll run into the same problem again.
32
+ # Once it's more than 1 minute old, we figure it's OK to try again
33
+ # so we can pick up any topic that's that old, even if it was
34
+ # locked by someone, because it's the job of the producer to keep
35
+ # updating the locked_at timestamp as they work on messages in that
36
+ # topic. If the locked_at timestamp is that old, chances are that
37
+ # the producer crashed.
38
+ sql = <<~SQL
39
+ UPDATE #{qtable}
40
+ SET locked_by=#{qlock_id}, locked_at=#{qnow}, error=#{qfalse}
41
+ WHERE topic=#{qtopic} AND
42
+ ((locked_by IS NULL AND error=#{qfalse}) OR locked_at < #{qtime})
43
+ SQL
44
+ self.connection.update(sql)
45
+ self.where(locked_by: lock_id, topic: topic).any?
46
+ end
47
+
48
+ # This is called once a producer is finished working on a topic, i.e.
49
+ # there are no more messages to fetch. It unlocks the topic and
50
+ # moves on to the next one.
51
+ # @param topic [String]
52
+ # @param lock_id [String]
53
+ def clear_lock(topic, lock_id)
54
+ self.where(topic: topic, locked_by: lock_id).
55
+ update_all(locked_by: nil, locked_at: nil, error: false, retries: 0)
56
+ end
57
+
58
+ # The producer calls this if it gets an error sending messages. This
59
+ # essentially locks down this topic for 1 minute (for all producers)
60
+ # and allows the caller to continue to the next topic.
61
+ # @param topic [String]
62
+ # @param lock_id [String]
63
+ def register_error(topic, lock_id)
64
+ record = self.where(topic: topic, locked_by: lock_id).last
65
+ attr_hash = { locked_by: nil,
66
+ locked_at: Time.zone.now,
67
+ error: true,
68
+ retries: record.retries + 1 }
69
+ if Rails::VERSION::MAJOR >= 4
70
+ record.update!(attr_hash)
71
+ else
72
+ record.update_attributes!(attr_hash)
73
+ end
74
+ end
75
+
76
+ # Update the locked_at timestamp to indicate that the producer is still
77
+ # working on those messages and to continue.
78
+ # @param topic [String]
79
+ # @param lock_id [String]
80
+ def heartbeat(topic, lock_id)
81
+ self.where(topic: topic, locked_by: lock_id).
82
+ update_all(locked_at: Time.zone.now)
83
+ end
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ # Basically a struct to hold the message as it's processed.
5
+ class Message
6
+ attr_accessor :payload, :key, :partition_key, :encoded_key,
7
+ :encoded_payload, :topic, :producer_name
8
+
9
+ # @param payload [Hash]
10
+ # @param producer [Class]
11
+ def initialize(payload, producer, topic: nil, key: nil, partition_key: nil)
12
+ @payload = payload&.with_indifferent_access
13
+ @producer_name = producer.name
14
+ @topic = topic
15
+ @key = key
16
+ @partition_key = partition_key
17
+ end
18
+
19
+ # Add message_id and timestamp default values if they are in the
20
+ # schema and don't already have values.
21
+ # @param schema [Avro::Schema]
22
+ def add_fields(schema)
23
+ return if @payload.except(:payload_key, :partition_key).blank?
24
+
25
+ if schema.fields.any? { |f| f.name == 'message_id' }
26
+ @payload['message_id'] ||= SecureRandom.uuid
27
+ end
28
+ if schema.fields.any? { |f| f.name == 'timestamp' }
29
+ @payload['timestamp'] ||= Time.now.in_time_zone.to_s
30
+ end
31
+ end
32
+
33
+ # @param schema [Avro::Schema]
34
+ def coerce_fields(schema)
35
+ return if payload.nil?
36
+
37
+ @payload = SchemaCoercer.new(schema).coerce(@payload)
38
+ end
39
+
40
+ # @return [Hash]
41
+ def encoded_hash
42
+ {
43
+ topic: @topic,
44
+ key: @encoded_key,
45
+ partition_key: @partition_key || @encoded_key,
46
+ payload: @encoded_payload,
47
+ metadata: {
48
+ decoded_payload: @payload,
49
+ producer_name: @producer_name
50
+ }
51
+ }
52
+ end
53
+
54
+ # @return [Hash]
55
+ def to_h
56
+ {
57
+ topic: @topic,
58
+ key: @key,
59
+ partition_key: @partition_key || @key,
60
+ payload: @payload,
61
+ metadata: {
62
+ decoded_payload: @payload,
63
+ producer_name: @producer_name
64
+ }
65
+ }
66
+ end
67
+
68
+ # @param other [Message]
69
+ # @return [Boolean]
70
+ def ==(other)
71
+ self.to_h == other.to_h
72
+ end
73
+ end
74
+ end