deimos-ruby 1.0.0.pre.beta22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.circleci/config.yml +74 -0
- data/.gitignore +41 -0
- data/.gitmodules +0 -0
- data/.rspec +1 -0
- data/.rubocop.yml +321 -0
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/CHANGELOG.md +32 -0
- data/CODE_OF_CONDUCT.md +77 -0
- data/Dockerfile +23 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +165 -0
- data/Guardfile +22 -0
- data/LICENSE.md +195 -0
- data/README.md +752 -0
- data/Rakefile +13 -0
- data/bin/deimos +4 -0
- data/deimos-kafka.gemspec +42 -0
- data/docker-compose.yml +71 -0
- data/docs/DATABASE_BACKEND.md +147 -0
- data/docs/PULL_REQUEST_TEMPLATE.md +34 -0
- data/lib/deimos/active_record_consumer.rb +81 -0
- data/lib/deimos/active_record_producer.rb +64 -0
- data/lib/deimos/avro_data_coder.rb +89 -0
- data/lib/deimos/avro_data_decoder.rb +36 -0
- data/lib/deimos/avro_data_encoder.rb +51 -0
- data/lib/deimos/backends/db.rb +27 -0
- data/lib/deimos/backends/kafka.rb +27 -0
- data/lib/deimos/backends/kafka_async.rb +27 -0
- data/lib/deimos/configuration.rb +90 -0
- data/lib/deimos/consumer.rb +164 -0
- data/lib/deimos/instrumentation.rb +71 -0
- data/lib/deimos/kafka_message.rb +27 -0
- data/lib/deimos/kafka_source.rb +126 -0
- data/lib/deimos/kafka_topic_info.rb +86 -0
- data/lib/deimos/message.rb +74 -0
- data/lib/deimos/metrics/datadog.rb +47 -0
- data/lib/deimos/metrics/mock.rb +39 -0
- data/lib/deimos/metrics/provider.rb +38 -0
- data/lib/deimos/monkey_patches/phobos_cli.rb +35 -0
- data/lib/deimos/monkey_patches/phobos_producer.rb +51 -0
- data/lib/deimos/monkey_patches/ruby_kafka_heartbeat.rb +85 -0
- data/lib/deimos/monkey_patches/schema_store.rb +19 -0
- data/lib/deimos/producer.rb +218 -0
- data/lib/deimos/publish_backend.rb +30 -0
- data/lib/deimos/railtie.rb +8 -0
- data/lib/deimos/schema_coercer.rb +108 -0
- data/lib/deimos/shared_config.rb +59 -0
- data/lib/deimos/test_helpers.rb +356 -0
- data/lib/deimos/tracing/datadog.rb +35 -0
- data/lib/deimos/tracing/mock.rb +40 -0
- data/lib/deimos/tracing/provider.rb +31 -0
- data/lib/deimos/utils/db_producer.rb +122 -0
- data/lib/deimos/utils/executor.rb +117 -0
- data/lib/deimos/utils/inline_consumer.rb +144 -0
- data/lib/deimos/utils/lag_reporter.rb +182 -0
- data/lib/deimos/utils/platform_schema_validation.rb +0 -0
- data/lib/deimos/utils/signal_handler.rb +68 -0
- data/lib/deimos/version.rb +5 -0
- data/lib/deimos.rb +133 -0
- data/lib/generators/deimos/db_backend/templates/migration +24 -0
- data/lib/generators/deimos/db_backend/templates/rails3_migration +30 -0
- data/lib/generators/deimos/db_backend_generator.rb +48 -0
- data/lib/tasks/deimos.rake +27 -0
- data/spec/active_record_consumer_spec.rb +81 -0
- data/spec/active_record_producer_spec.rb +107 -0
- data/spec/avro_data_decoder_spec.rb +18 -0
- data/spec/avro_data_encoder_spec.rb +37 -0
- data/spec/backends/db_spec.rb +35 -0
- data/spec/backends/kafka_async_spec.rb +11 -0
- data/spec/backends/kafka_spec.rb +11 -0
- data/spec/consumer_spec.rb +169 -0
- data/spec/deimos_spec.rb +120 -0
- data/spec/kafka_source_spec.rb +168 -0
- data/spec/kafka_topic_info_spec.rb +88 -0
- data/spec/phobos.bad_db.yml +73 -0
- data/spec/phobos.yml +73 -0
- data/spec/producer_spec.rb +397 -0
- data/spec/publish_backend_spec.rb +10 -0
- data/spec/schemas/com/my-namespace/MySchema-key.avsc +13 -0
- data/spec/schemas/com/my-namespace/MySchema.avsc +18 -0
- data/spec/schemas/com/my-namespace/MySchemaWithBooleans.avsc +18 -0
- data/spec/schemas/com/my-namespace/MySchemaWithDateTimes.avsc +33 -0
- data/spec/schemas/com/my-namespace/MySchemaWithId.avsc +28 -0
- data/spec/schemas/com/my-namespace/MySchemaWithUniqueId.avsc +32 -0
- data/spec/schemas/com/my-namespace/Widget.avsc +27 -0
- data/spec/schemas/com/my-namespace/WidgetTheSecond.avsc +27 -0
- data/spec/spec_helper.rb +207 -0
- data/spec/updateable_schema_store_spec.rb +36 -0
- data/spec/utils/db_producer_spec.rb +259 -0
- data/spec/utils/executor_spec.rb +42 -0
- data/spec/utils/lag_reporter_spec.rb +69 -0
- data/spec/utils/platform_schema_validation_spec.rb +0 -0
- data/spec/utils/signal_handler_spec.rb +16 -0
- data/support/deimos-solo.png +0 -0
- data/support/deimos-with-name-next.png +0 -0
- data/support/deimos-with-name.png +0 -0
- data/support/flipp-logo.png +0 -0
- metadata +452 -0
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Deimos
|
4
|
+
module Backends
|
5
|
+
# Default backend to produce to Kafka.
|
6
|
+
class Kafka < Deimos::PublishBackend
|
7
|
+
include Phobos::Producer
|
8
|
+
|
9
|
+
# :nodoc:
|
10
|
+
def self.execute(producer_class:, messages:)
|
11
|
+
Deimos.instrument(
|
12
|
+
'produce',
|
13
|
+
producer: producer_class,
|
14
|
+
topic: producer_class.topic,
|
15
|
+
payloads: messages.map(&:payload)
|
16
|
+
) do
|
17
|
+
producer.publish_list(messages.map(&:encoded_hash))
|
18
|
+
Deimos.config.metrics&.increment(
|
19
|
+
'publish',
|
20
|
+
tags: %W(status:success topic:#{producer_class.topic}),
|
21
|
+
by: messages.size
|
22
|
+
)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Deimos
|
4
|
+
module Backends
|
5
|
+
# Backend which produces to Kafka via an async producer.
|
6
|
+
class KafkaAsync < Deimos::PublishBackend
|
7
|
+
include Phobos::Producer
|
8
|
+
|
9
|
+
# :nodoc:
|
10
|
+
def self.execute(producer_class:, messages:)
|
11
|
+
Deimos.instrument(
|
12
|
+
'produce',
|
13
|
+
producer: producer_class,
|
14
|
+
topic: producer_class.topic,
|
15
|
+
payloads: messages.map(&:payload)
|
16
|
+
) do
|
17
|
+
producer.async_publish_list(messages.map(&:encoded_hash))
|
18
|
+
Deimos.config.metrics&.increment(
|
19
|
+
'publish',
|
20
|
+
tags: %W(status:success topic:#{producer_class.topic}),
|
21
|
+
by: messages.size
|
22
|
+
)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Deimos
|
4
|
+
# Class to hold configuration.
|
5
|
+
class Configuration
|
6
|
+
# @return [Logger]
|
7
|
+
attr_accessor :logger
|
8
|
+
attr_accessor :phobos_logger
|
9
|
+
attr_accessor :kafka_logger
|
10
|
+
|
11
|
+
# By default, consumer errors will be consumed and logged to
|
12
|
+
# the metrics provider.
|
13
|
+
# Set this to true to force the error to be raised.
|
14
|
+
# @return [Boolean]
|
15
|
+
attr_accessor :reraise_consumer_errors
|
16
|
+
|
17
|
+
# @return [String]
|
18
|
+
attr_accessor :schema_registry_url
|
19
|
+
|
20
|
+
# @return [String]
|
21
|
+
attr_accessor :seed_broker
|
22
|
+
|
23
|
+
# Local path to schemas.
|
24
|
+
# @return [String]
|
25
|
+
attr_accessor :schema_path
|
26
|
+
|
27
|
+
# Default namespace for all producers. Can remain nil. Individual
|
28
|
+
# producers can override.
|
29
|
+
# @return [String]
|
30
|
+
attr_accessor :producer_schema_namespace
|
31
|
+
|
32
|
+
# Add a prefix to all topic names. This can be useful if you're using
|
33
|
+
# the same Kafka broker for different environments that are producing
|
34
|
+
# the same topics.
|
35
|
+
# @return [String]
|
36
|
+
attr_accessor :producer_topic_prefix
|
37
|
+
|
38
|
+
# Disable all actual message producing. Useful when doing things like
|
39
|
+
# mass imports or data space management when events don't need to be
|
40
|
+
# fired.
|
41
|
+
# @return [Boolean]
|
42
|
+
attr_accessor :disable_producers
|
43
|
+
|
44
|
+
# File path to the Phobos configuration file, relative to the application root.
|
45
|
+
# @return [String]
|
46
|
+
attr_accessor :phobos_config_file
|
47
|
+
|
48
|
+
# @return [Boolean]
|
49
|
+
attr_accessor :ssl_enabled
|
50
|
+
|
51
|
+
# @return [String]
|
52
|
+
attr_accessor :ssl_ca_cert
|
53
|
+
|
54
|
+
# @return [String]
|
55
|
+
attr_accessor :ssl_client_cert
|
56
|
+
|
57
|
+
# @return [String]
|
58
|
+
attr_accessor :ssl_client_cert_key
|
59
|
+
|
60
|
+
# Currently can be set to :db, :kafka, or :async_kafka. If using Kafka
|
61
|
+
# directly, set to async in your user-facing app, and sync in your
|
62
|
+
# consumers or delayed workers.
|
63
|
+
# @return [Symbol]
|
64
|
+
attr_accessor :publish_backend
|
65
|
+
|
66
|
+
# @return [Boolean]
|
67
|
+
attr_accessor :report_lag
|
68
|
+
|
69
|
+
# @return [Metrics::Provider]
|
70
|
+
attr_accessor :metrics
|
71
|
+
|
72
|
+
# @return [Tracing::Provider]
|
73
|
+
attr_accessor :tracer
|
74
|
+
|
75
|
+
# :nodoc:
|
76
|
+
def initialize
|
77
|
+
@phobos_config_file = 'config/phobos.yml'
|
78
|
+
@publish_backend = :kafka_async
|
79
|
+
end
|
80
|
+
|
81
|
+
# @param other_config [Configuration]
|
82
|
+
# @return [Boolean]
|
83
|
+
def phobos_config_changed?(other_config)
|
84
|
+
phobos_keys = %w(seed_broker phobos_config_file ssl_ca_cert ssl_client_cert ssl_client_cert_key)
|
85
|
+
return true if phobos_keys.any? { |key| self.send(key) != other_config.send(key) }
|
86
|
+
|
87
|
+
other_config.logger != self.logger
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
@@ -0,0 +1,164 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'deimos/avro_data_decoder'
|
4
|
+
require 'deimos/shared_config'
|
5
|
+
require 'phobos/handler'
|
6
|
+
require 'active_support/all'
|
7
|
+
require 'ddtrace'
|
8
|
+
|
9
|
+
# Class to consume messages coming from the pipeline topic
|
10
|
+
# Note: According to the docs, instances of your handler will be created
|
11
|
+
# for every incoming message. This class should be lightweight.
|
12
|
+
module Deimos
|
13
|
+
# Parent consumer class.
|
14
|
+
class Consumer
|
15
|
+
include Phobos::Handler
|
16
|
+
include SharedConfig
|
17
|
+
|
18
|
+
class << self
|
19
|
+
# @return [AvroDataEncoder]
|
20
|
+
def decoder
|
21
|
+
@decoder ||= AvroDataDecoder.new(schema: config[:schema],
|
22
|
+
namespace: config[:namespace])
|
23
|
+
end
|
24
|
+
|
25
|
+
# @return [AvroDataEncoder]
|
26
|
+
def key_decoder
|
27
|
+
@key_decoder ||= AvroDataDecoder.new(schema: config[:key_schema],
|
28
|
+
namespace: config[:namespace])
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# :nodoc:
|
33
|
+
def around_consume(payload, metadata)
|
34
|
+
_received_message(payload, metadata)
|
35
|
+
benchmark = Benchmark.measure do
|
36
|
+
_with_error_span(payload, metadata) { yield }
|
37
|
+
end
|
38
|
+
_handle_success(benchmark.real, payload, metadata)
|
39
|
+
end
|
40
|
+
|
41
|
+
# :nodoc:
|
42
|
+
def before_consume(payload, metadata)
|
43
|
+
_with_error_span(payload, metadata) do
|
44
|
+
if self.class.config[:key_schema] || self.class.config[:key_field]
|
45
|
+
metadata[:key] = decode_key(metadata[:key])
|
46
|
+
end
|
47
|
+
self.class.decoder.decode(payload) if payload.present?
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Helper method to decode an Avro-encoded key.
|
52
|
+
# @param key [String]
|
53
|
+
# @return [Object] the decoded key.
|
54
|
+
def decode_key(key)
|
55
|
+
return nil if key.nil?
|
56
|
+
|
57
|
+
config = self.class.config
|
58
|
+
if config[:encode_key] && config[:key_field].nil? &&
|
59
|
+
config[:key_schema].nil?
|
60
|
+
raise 'No key config given - if you are not decoding keys, please use `key_config plain: true`'
|
61
|
+
end
|
62
|
+
|
63
|
+
if config[:key_field]
|
64
|
+
self.class.decoder.decode_key(key, config[:key_field])
|
65
|
+
elsif config[:key_schema]
|
66
|
+
self.class.key_decoder.decode(key, schema: config[:key_schema])
|
67
|
+
else # no encoding
|
68
|
+
key
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
# Consume incoming messages.
|
73
|
+
# @param _payload [String]
|
74
|
+
# @param _metadata [Hash]
|
75
|
+
def consume(_payload, _metadata)
|
76
|
+
raise NotImplementedError
|
77
|
+
end
|
78
|
+
|
79
|
+
private
|
80
|
+
|
81
|
+
# @param payload [Hash|String]
|
82
|
+
# @param metadata [Hash]
|
83
|
+
def _with_error_span(payload, metadata)
|
84
|
+
@span = Deimos.config.tracer&.start(
|
85
|
+
'deimos-consumer',
|
86
|
+
resource: self.class.name.gsub('::', '-')
|
87
|
+
)
|
88
|
+
yield
|
89
|
+
rescue StandardError => e
|
90
|
+
_handle_error(e, payload, metadata)
|
91
|
+
ensure
|
92
|
+
Deimos.config.tracer&.finish(@span)
|
93
|
+
end
|
94
|
+
|
95
|
+
def _received_message(payload, metadata)
|
96
|
+
Deimos.config.logger.info(
|
97
|
+
message: 'Got Kafka event',
|
98
|
+
payload: payload,
|
99
|
+
metadata: metadata
|
100
|
+
)
|
101
|
+
Deimos.config.metrics&.increment('handler', tags: %W(
|
102
|
+
status:received
|
103
|
+
topic:#{metadata[:topic]}
|
104
|
+
))
|
105
|
+
end
|
106
|
+
|
107
|
+
# @param exception [Throwable]
|
108
|
+
# @param payload [Hash]
|
109
|
+
# @param metadata [Hash]
|
110
|
+
def _handle_error(exception, payload, metadata)
|
111
|
+
Deimos.config.tracer&.set_error(@span, exception)
|
112
|
+
Deimos.config.metrics&.increment(
|
113
|
+
'handler',
|
114
|
+
tags: %W(
|
115
|
+
status:error
|
116
|
+
topic:#{metadata[:topic]}
|
117
|
+
)
|
118
|
+
)
|
119
|
+
Deimos.config.logger.warn(
|
120
|
+
message: 'Error consuming message',
|
121
|
+
handler: self.class.name,
|
122
|
+
metadata: metadata,
|
123
|
+
data: payload,
|
124
|
+
error_message: exception.message,
|
125
|
+
error: exception.backtrace
|
126
|
+
)
|
127
|
+
raise if Deimos.config.reraise_consumer_errors
|
128
|
+
end
|
129
|
+
|
130
|
+
# @param time_taken [Float]
|
131
|
+
# @param payload [Hash]
|
132
|
+
# @param metadata [Hash]
|
133
|
+
def _handle_success(time_taken, payload, metadata)
|
134
|
+
Deimos.config.metrics&.histogram('handler', time_taken, tags: %W(
|
135
|
+
time:consume
|
136
|
+
topic:#{metadata[:topic]}
|
137
|
+
))
|
138
|
+
Deimos.config.metrics&.increment('handler', tags: %W(
|
139
|
+
status:success
|
140
|
+
topic:#{metadata[:topic]}
|
141
|
+
))
|
142
|
+
Deimos.config.logger.info(
|
143
|
+
message: 'Finished processing Kafka event',
|
144
|
+
payload: payload,
|
145
|
+
time_elapsed: time_taken,
|
146
|
+
metadata: metadata
|
147
|
+
)
|
148
|
+
return if payload.nil? || payload['timestamp'].blank?
|
149
|
+
|
150
|
+
begin
|
151
|
+
time_delayed = Time.now.in_time_zone - payload['timestamp'].to_datetime
|
152
|
+
rescue ArgumentError
|
153
|
+
Deimos.config.logger.info(
|
154
|
+
message: "Error parsing timestamp! #{payload['timestamp']}"
|
155
|
+
)
|
156
|
+
return
|
157
|
+
end
|
158
|
+
Deimos.config.metrics&.histogram('handler', time_delayed, tags: %W(
|
159
|
+
time:time_delayed
|
160
|
+
topic:#{metadata[:topic]}
|
161
|
+
))
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'active_support/notifications'
|
4
|
+
require 'active_support/concern'
|
5
|
+
|
6
|
+
# :nodoc:
|
7
|
+
module Deimos
|
8
|
+
# Copied from Phobos instrumentation.
|
9
|
+
module Instrumentation
|
10
|
+
extend ActiveSupport::Concern
|
11
|
+
NAMESPACE = 'Deimos'
|
12
|
+
|
13
|
+
# :nodoc:
|
14
|
+
module ClassMethods
|
15
|
+
# :nodoc:
|
16
|
+
def subscribe(event)
|
17
|
+
ActiveSupport::Notifications.subscribe("#{NAMESPACE}.#{event}") do |*args|
|
18
|
+
yield(ActiveSupport::Notifications::Event.new(*args)) if block_given?
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# :nodoc:
|
23
|
+
def unsubscribe(subscriber)
|
24
|
+
ActiveSupport::Notifications.unsubscribe(subscriber)
|
25
|
+
end
|
26
|
+
|
27
|
+
# :nodoc:
|
28
|
+
def instrument(event, extra={})
|
29
|
+
ActiveSupport::Notifications.instrument("#{NAMESPACE}.#{event}", extra) do |extra2|
|
30
|
+
yield(extra2) if block_given?
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
include Instrumentation
|
37
|
+
|
38
|
+
# This module listens to events published by RubyKafka.
|
39
|
+
module KafkaListener
|
40
|
+
# Listens for any exceptions that happen during publishing and re-publishes
|
41
|
+
# as a Deimos event.
|
42
|
+
# @param event [ActiveSupport::Notification]
|
43
|
+
def self.send_produce_error(event)
|
44
|
+
exception = event.payload[:exception_object]
|
45
|
+
return if !exception || !exception.respond_to?(:failed_messages)
|
46
|
+
|
47
|
+
messages = exception.failed_messages
|
48
|
+
messages.group_by(&:topic).each do |topic, batch|
|
49
|
+
next if batch.empty?
|
50
|
+
|
51
|
+
producer = batch.first.metadata[:producer_name]
|
52
|
+
payloads = batch.map { |m| m.metadata[:decoded_payload] }
|
53
|
+
|
54
|
+
Deimos.config.metrics&.count('publish_error', payloads.size,
|
55
|
+
tags: %W(topic:#{topic}))
|
56
|
+
Deimos.instrument(
|
57
|
+
'produce_error',
|
58
|
+
producer: producer,
|
59
|
+
topic: topic,
|
60
|
+
exception_object: exception,
|
61
|
+
payloads: payloads
|
62
|
+
)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
ActiveSupport::Notifications.subscribe('deliver_messages.producer.kafka') do |*args|
|
68
|
+
event = ActiveSupport::Notifications::Event.new(*args)
|
69
|
+
KafkaListener.send_produce_error(event)
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Deimos
|
4
|
+
# Store Kafka messages into the database.
|
5
|
+
class KafkaMessage < ActiveRecord::Base
|
6
|
+
self.table_name = 'kafka_messages'
|
7
|
+
|
8
|
+
validates_presence_of :message, :topic
|
9
|
+
|
10
|
+
# Ensure it gets turned into a string, e.g. for testing purposes. It
|
11
|
+
# should already be a string.
|
12
|
+
# @param mess [Object]
|
13
|
+
def message=(mess)
|
14
|
+
write_attribute(:message, mess.to_s)
|
15
|
+
end
|
16
|
+
|
17
|
+
# @return [Hash]
|
18
|
+
def phobos_message
|
19
|
+
{
|
20
|
+
payload: self.message,
|
21
|
+
partition_key: self.partition_key,
|
22
|
+
key: self.key,
|
23
|
+
topic: self.topic
|
24
|
+
}
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Deimos
|
4
|
+
# Represents an object which needs to inform Kafka when it is saved or
|
5
|
+
# bulk imported.
|
6
|
+
module KafkaSource
|
7
|
+
extend ActiveSupport::Concern
|
8
|
+
|
9
|
+
included do
|
10
|
+
after_create(:send_kafka_event_on_create)
|
11
|
+
after_update(:send_kafka_event_on_update)
|
12
|
+
after_destroy(:send_kafka_event_on_destroy)
|
13
|
+
end
|
14
|
+
|
15
|
+
# Send the newly created model to Kafka.
|
16
|
+
def send_kafka_event_on_create
|
17
|
+
return unless self.persisted?
|
18
|
+
return unless self.class.kafka_config[:create]
|
19
|
+
|
20
|
+
self.class.kafka_producers.each { |p| p.send_event(self) }
|
21
|
+
end
|
22
|
+
|
23
|
+
# Send the newly updated model to Kafka.
|
24
|
+
def send_kafka_event_on_update
|
25
|
+
return unless self.class.kafka_config[:update]
|
26
|
+
|
27
|
+
producers = self.class.kafka_producers
|
28
|
+
fields = producers.flat_map(&:watched_attributes).uniq
|
29
|
+
fields -= ['updated_at']
|
30
|
+
# Only send an event if a field we care about was changed.
|
31
|
+
any_changes = fields.any? do |field|
|
32
|
+
field_change = self.previous_changes[field]
|
33
|
+
field_change.present? && field_change[0] != field_change[1]
|
34
|
+
end
|
35
|
+
return unless any_changes
|
36
|
+
|
37
|
+
producers.each { |p| p.send_event(self) }
|
38
|
+
end
|
39
|
+
|
40
|
+
# Send a deletion (null payload) event to Kafka.
|
41
|
+
def send_kafka_event_on_destroy
|
42
|
+
return unless self.class.kafka_config[:delete]
|
43
|
+
|
44
|
+
self.class.kafka_producers.each { |p| p.send_event(self.deletion_payload) }
|
45
|
+
end
|
46
|
+
|
47
|
+
# Payload to send after we are destroyed.
|
48
|
+
# @return [Hash]
|
49
|
+
def deletion_payload
|
50
|
+
{ payload_key: self[self.class.primary_key] }
|
51
|
+
end
|
52
|
+
|
53
|
+
# :nodoc:
|
54
|
+
module ClassMethods
|
55
|
+
# @return [Hash]
|
56
|
+
def kafka_config
|
57
|
+
{
|
58
|
+
update: true,
|
59
|
+
delete: true,
|
60
|
+
import: true,
|
61
|
+
create: true
|
62
|
+
}
|
63
|
+
end
|
64
|
+
|
65
|
+
# @return [Array<Deimos::ActiveRecordProducer>] the producers to run.
|
66
|
+
def kafka_producers
|
67
|
+
raise NotImplementedError if self.method(:kafka_producer).
|
68
|
+
owner == Deimos::KafkaSource
|
69
|
+
|
70
|
+
[self.kafka_producer]
|
71
|
+
end
|
72
|
+
|
73
|
+
# Deprecated - use #kafka_producers instead.
|
74
|
+
# @return [Deimos::ActiveRecordProducer] the producer to use.
|
75
|
+
def kafka_producer
|
76
|
+
raise NotImplementedError if self.method(:kafka_producers).
|
77
|
+
owner == Deimos::KafkaSource
|
78
|
+
|
79
|
+
self.kafka_producers.first
|
80
|
+
end
|
81
|
+
|
82
|
+
# This is an internal method, part of the activerecord_import gem. It's
|
83
|
+
# the one that actually does the importing, having already normalized
|
84
|
+
# the inputs (arrays, hashes, records etc.)
|
85
|
+
# Basically we want to first do the import, then reload the records
|
86
|
+
# and send them to Kafka.
|
87
|
+
def import_without_validations_or_callbacks(column_names,
|
88
|
+
array_of_attributes,
|
89
|
+
options={})
|
90
|
+
results = super
|
91
|
+
return unless self.kafka_config[:import]
|
92
|
+
return if array_of_attributes.empty?
|
93
|
+
|
94
|
+
# This will contain an array of hashes, where each hash is the actual
|
95
|
+
# attribute hash that created the object.
|
96
|
+
ids =
|
97
|
+
ids = if results.is_a?(Array)
|
98
|
+
results[1]
|
99
|
+
elsif results.respond_to?(:ids)
|
100
|
+
results.ids
|
101
|
+
else
|
102
|
+
[]
|
103
|
+
end
|
104
|
+
if ids.blank?
|
105
|
+
# re-fill IDs based on what was just entered into the DB.
|
106
|
+
if self.connection.adapter_name.downcase =~ /sqlite/
|
107
|
+
last_id = self.connection.select_value('select last_insert_rowid()')
|
108
|
+
ids = ((last_id - array_of_attributes.size + 1)..last_id).to_a
|
109
|
+
else # mysql
|
110
|
+
last_id = self.connection.select_value('select LAST_INSERT_ID()')
|
111
|
+
ids = (last_id..(last_id + array_of_attributes.size)).to_a
|
112
|
+
end
|
113
|
+
end
|
114
|
+
array_of_hashes = []
|
115
|
+
array_of_attributes.each_with_index do |array, i|
|
116
|
+
hash = column_names.zip(array).to_h.with_indifferent_access
|
117
|
+
hash[self.primary_key] = ids[i] if hash[self.primary_key].blank?
|
118
|
+
array_of_hashes << hash
|
119
|
+
end
|
120
|
+
|
121
|
+
self.kafka_producers.each { |p| p.send_events(array_of_hashes) }
|
122
|
+
results
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Deimos
|
4
|
+
# Record that keeps track of which topics are being worked on by DbProducers.
|
5
|
+
class KafkaTopicInfo < ActiveRecord::Base
|
6
|
+
self.table_name = 'kafka_topic_info'
|
7
|
+
|
8
|
+
class << self
|
9
|
+
# Lock a topic for the given ID. Returns whether the lock was successful.
|
10
|
+
# @param topic [String]
|
11
|
+
# @param lock_id [String]
|
12
|
+
# @return [Boolean]
|
13
|
+
def lock(topic, lock_id)
|
14
|
+
# Try to create it - it's fine if it already exists
|
15
|
+
begin
|
16
|
+
self.create(topic: topic)
|
17
|
+
rescue ActiveRecord::RecordNotUnique # rubocop:disable Lint/HandleExceptions
|
18
|
+
# continue on
|
19
|
+
end
|
20
|
+
|
21
|
+
# Lock the record
|
22
|
+
qtopic = self.connection.quote(topic)
|
23
|
+
qlock_id = self.connection.quote(lock_id)
|
24
|
+
qtable = self.connection.quote_table_name('kafka_topic_info')
|
25
|
+
qnow = self.connection.quote(Time.zone.now.to_s(:db))
|
26
|
+
qfalse = self.connection.quoted_false
|
27
|
+
qtime = self.connection.quote(1.minute.ago.to_s(:db))
|
28
|
+
|
29
|
+
# If a record is marked as error and less than 1 minute old,
|
30
|
+
# we don't want to pick it up even if not currently locked because
|
31
|
+
# we worry we'll run into the same problem again.
|
32
|
+
# Once it's more than 1 minute old, we figure it's OK to try again
|
33
|
+
# so we can pick up any topic that's that old, even if it was
|
34
|
+
# locked by someone, because it's the job of the producer to keep
|
35
|
+
# updating the locked_at timestamp as they work on messages in that
|
36
|
+
# topic. If the locked_at timestamp is that old, chances are that
|
37
|
+
# the producer crashed.
|
38
|
+
sql = <<~SQL
|
39
|
+
UPDATE #{qtable}
|
40
|
+
SET locked_by=#{qlock_id}, locked_at=#{qnow}, error=#{qfalse}
|
41
|
+
WHERE topic=#{qtopic} AND
|
42
|
+
((locked_by IS NULL AND error=#{qfalse}) OR locked_at < #{qtime})
|
43
|
+
SQL
|
44
|
+
self.connection.update(sql)
|
45
|
+
self.where(locked_by: lock_id, topic: topic).any?
|
46
|
+
end
|
47
|
+
|
48
|
+
# This is called once a producer is finished working on a topic, i.e.
|
49
|
+
# there are no more messages to fetch. It unlocks the topic and
|
50
|
+
# moves on to the next one.
|
51
|
+
# @param topic [String]
|
52
|
+
# @param lock_id [String]
|
53
|
+
def clear_lock(topic, lock_id)
|
54
|
+
self.where(topic: topic, locked_by: lock_id).
|
55
|
+
update_all(locked_by: nil, locked_at: nil, error: false, retries: 0)
|
56
|
+
end
|
57
|
+
|
58
|
+
# The producer calls this if it gets an error sending messages. This
|
59
|
+
# essentially locks down this topic for 1 minute (for all producers)
|
60
|
+
# and allows the caller to continue to the next topic.
|
61
|
+
# @param topic [String]
|
62
|
+
# @param lock_id [String]
|
63
|
+
def register_error(topic, lock_id)
|
64
|
+
record = self.where(topic: topic, locked_by: lock_id).last
|
65
|
+
attr_hash = { locked_by: nil,
|
66
|
+
locked_at: Time.zone.now,
|
67
|
+
error: true,
|
68
|
+
retries: record.retries + 1 }
|
69
|
+
if Rails::VERSION::MAJOR >= 4
|
70
|
+
record.update!(attr_hash)
|
71
|
+
else
|
72
|
+
record.update_attributes!(attr_hash)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# Update the locked_at timestamp to indicate that the producer is still
|
77
|
+
# working on those messages and to continue.
|
78
|
+
# @param topic [String]
|
79
|
+
# @param lock_id [String]
|
80
|
+
def heartbeat(topic, lock_id)
|
81
|
+
self.where(topic: topic, locked_by: lock_id).
|
82
|
+
update_all(locked_at: Time.zone.now)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Deimos
|
4
|
+
# Basically a struct to hold the message as it's processed.
|
5
|
+
class Message
|
6
|
+
attr_accessor :payload, :key, :partition_key, :encoded_key,
|
7
|
+
:encoded_payload, :topic, :producer_name
|
8
|
+
|
9
|
+
# @param payload [Hash]
|
10
|
+
# @param producer [Class]
|
11
|
+
def initialize(payload, producer, topic: nil, key: nil, partition_key: nil)
|
12
|
+
@payload = payload&.with_indifferent_access
|
13
|
+
@producer_name = producer.name
|
14
|
+
@topic = topic
|
15
|
+
@key = key
|
16
|
+
@partition_key = partition_key
|
17
|
+
end
|
18
|
+
|
19
|
+
# Add message_id and timestamp default values if they are in the
|
20
|
+
# schema and don't already have values.
|
21
|
+
# @param schema [Avro::Schema]
|
22
|
+
def add_fields(schema)
|
23
|
+
return if @payload.except(:payload_key, :partition_key).blank?
|
24
|
+
|
25
|
+
if schema.fields.any? { |f| f.name == 'message_id' }
|
26
|
+
@payload['message_id'] ||= SecureRandom.uuid
|
27
|
+
end
|
28
|
+
if schema.fields.any? { |f| f.name == 'timestamp' }
|
29
|
+
@payload['timestamp'] ||= Time.now.in_time_zone.to_s
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# @param schema [Avro::Schema]
|
34
|
+
def coerce_fields(schema)
|
35
|
+
return if payload.nil?
|
36
|
+
|
37
|
+
@payload = SchemaCoercer.new(schema).coerce(@payload)
|
38
|
+
end
|
39
|
+
|
40
|
+
# @return [Hash]
|
41
|
+
def encoded_hash
|
42
|
+
{
|
43
|
+
topic: @topic,
|
44
|
+
key: @encoded_key,
|
45
|
+
partition_key: @partition_key || @encoded_key,
|
46
|
+
payload: @encoded_payload,
|
47
|
+
metadata: {
|
48
|
+
decoded_payload: @payload,
|
49
|
+
producer_name: @producer_name
|
50
|
+
}
|
51
|
+
}
|
52
|
+
end
|
53
|
+
|
54
|
+
# @return [Hash]
|
55
|
+
def to_h
|
56
|
+
{
|
57
|
+
topic: @topic,
|
58
|
+
key: @key,
|
59
|
+
partition_key: @partition_key || @key,
|
60
|
+
payload: @payload,
|
61
|
+
metadata: {
|
62
|
+
decoded_payload: @payload,
|
63
|
+
producer_name: @producer_name
|
64
|
+
}
|
65
|
+
}
|
66
|
+
end
|
67
|
+
|
68
|
+
# @param other [Message]
|
69
|
+
# @return [Boolean]
|
70
|
+
def ==(other)
|
71
|
+
self.to_h == other.to_h
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|