deimos-kafka 1.0.0.pre.beta15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. checksums.yaml +7 -0
  2. data/.circleci/config.yml +74 -0
  3. data/.gitignore +41 -0
  4. data/.gitmodules +0 -0
  5. data/.rspec +1 -0
  6. data/.rubocop.yml +321 -0
  7. data/.ruby-gemset +1 -0
  8. data/.ruby-version +1 -0
  9. data/CHANGELOG.md +9 -0
  10. data/CODE_OF_CONDUCT.md +77 -0
  11. data/Dockerfile +23 -0
  12. data/Gemfile +6 -0
  13. data/Gemfile.lock +165 -0
  14. data/Guardfile +22 -0
  15. data/LICENSE.md +195 -0
  16. data/README.md +742 -0
  17. data/Rakefile +13 -0
  18. data/bin/deimos +4 -0
  19. data/deimos-kafka.gemspec +42 -0
  20. data/docker-compose.yml +71 -0
  21. data/docs/DATABASE_BACKEND.md +147 -0
  22. data/docs/PULL_REQUEST_TEMPLATE.md +34 -0
  23. data/lib/deimos.rb +134 -0
  24. data/lib/deimos/active_record_consumer.rb +81 -0
  25. data/lib/deimos/active_record_producer.rb +64 -0
  26. data/lib/deimos/avro_data_coder.rb +89 -0
  27. data/lib/deimos/avro_data_decoder.rb +36 -0
  28. data/lib/deimos/avro_data_encoder.rb +51 -0
  29. data/lib/deimos/backends/db.rb +27 -0
  30. data/lib/deimos/backends/kafka.rb +27 -0
  31. data/lib/deimos/backends/kafka_async.rb +27 -0
  32. data/lib/deimos/configuration.rb +88 -0
  33. data/lib/deimos/consumer.rb +164 -0
  34. data/lib/deimos/instrumentation.rb +71 -0
  35. data/lib/deimos/kafka_message.rb +27 -0
  36. data/lib/deimos/kafka_source.rb +126 -0
  37. data/lib/deimos/kafka_topic_info.rb +79 -0
  38. data/lib/deimos/message.rb +74 -0
  39. data/lib/deimos/metrics/datadog.rb +47 -0
  40. data/lib/deimos/metrics/mock.rb +39 -0
  41. data/lib/deimos/metrics/provider.rb +38 -0
  42. data/lib/deimos/monkey_patches/phobos_cli.rb +35 -0
  43. data/lib/deimos/monkey_patches/phobos_producer.rb +51 -0
  44. data/lib/deimos/monkey_patches/ruby_kafka_heartbeat.rb +85 -0
  45. data/lib/deimos/monkey_patches/schema_store.rb +19 -0
  46. data/lib/deimos/producer.rb +218 -0
  47. data/lib/deimos/publish_backend.rb +30 -0
  48. data/lib/deimos/railtie.rb +8 -0
  49. data/lib/deimos/schema_coercer.rb +108 -0
  50. data/lib/deimos/shared_config.rb +59 -0
  51. data/lib/deimos/test_helpers.rb +356 -0
  52. data/lib/deimos/tracing/datadog.rb +35 -0
  53. data/lib/deimos/tracing/mock.rb +40 -0
  54. data/lib/deimos/tracing/provider.rb +31 -0
  55. data/lib/deimos/utils/db_producer.rb +95 -0
  56. data/lib/deimos/utils/executor.rb +117 -0
  57. data/lib/deimos/utils/inline_consumer.rb +144 -0
  58. data/lib/deimos/utils/lag_reporter.rb +182 -0
  59. data/lib/deimos/utils/platform_schema_validation.rb +0 -0
  60. data/lib/deimos/utils/signal_handler.rb +68 -0
  61. data/lib/deimos/version.rb +5 -0
  62. data/lib/generators/deimos/db_backend/templates/migration +24 -0
  63. data/lib/generators/deimos/db_backend/templates/rails3_migration +30 -0
  64. data/lib/generators/deimos/db_backend_generator.rb +48 -0
  65. data/lib/tasks/deimos.rake +17 -0
  66. data/spec/active_record_consumer_spec.rb +81 -0
  67. data/spec/active_record_producer_spec.rb +107 -0
  68. data/spec/avro_data_decoder_spec.rb +18 -0
  69. data/spec/avro_data_encoder_spec.rb +37 -0
  70. data/spec/backends/db_spec.rb +35 -0
  71. data/spec/backends/kafka_async_spec.rb +11 -0
  72. data/spec/backends/kafka_spec.rb +11 -0
  73. data/spec/consumer_spec.rb +169 -0
  74. data/spec/deimos_spec.rb +117 -0
  75. data/spec/kafka_source_spec.rb +168 -0
  76. data/spec/kafka_topic_info_spec.rb +88 -0
  77. data/spec/phobos.bad_db.yml +73 -0
  78. data/spec/phobos.yml +73 -0
  79. data/spec/producer_spec.rb +397 -0
  80. data/spec/publish_backend_spec.rb +10 -0
  81. data/spec/schemas/com/my-namespace/MySchema-key.avsc +13 -0
  82. data/spec/schemas/com/my-namespace/MySchema.avsc +18 -0
  83. data/spec/schemas/com/my-namespace/MySchemaWithBooleans.avsc +18 -0
  84. data/spec/schemas/com/my-namespace/MySchemaWithDateTimes.avsc +33 -0
  85. data/spec/schemas/com/my-namespace/MySchemaWithId.avsc +28 -0
  86. data/spec/schemas/com/my-namespace/MySchemaWithUniqueId.avsc +32 -0
  87. data/spec/schemas/com/my-namespace/Widget.avsc +27 -0
  88. data/spec/schemas/com/my-namespace/WidgetTheSecond.avsc +27 -0
  89. data/spec/spec_helper.rb +207 -0
  90. data/spec/updateable_schema_store_spec.rb +36 -0
  91. data/spec/utils/db_producer_spec.rb +208 -0
  92. data/spec/utils/executor_spec.rb +42 -0
  93. data/spec/utils/lag_reporter_spec.rb +69 -0
  94. data/spec/utils/platform_schema_validation_spec.rb +0 -0
  95. data/spec/utils/signal_handler_spec.rb +16 -0
  96. data/support/deimos-solo.png +0 -0
  97. data/support/deimos-with-name-next.png +0 -0
  98. data/support/deimos-with-name.png +0 -0
  99. data/support/flipp-logo.png +0 -0
  100. metadata +452 -0
@@ -0,0 +1,182 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'mutex_m'
4
+
5
+ # :nodoc:
6
+ module Deimos
7
+ module Utils
8
+ # Class that manages reporting lag.
9
+ class LagReporter
10
+ extend Mutex_m
11
+
12
+ # Class that has a list of topics
13
+ class ConsumerGroup
14
+ # @return [Hash<String, Topic>]
15
+ attr_accessor :topics
16
+ # @return [String]
17
+ attr_accessor :id
18
+
19
+ # @param id [String]
20
+ def initialize(id)
21
+ self.id = id
22
+ self.topics = {}
23
+ end
24
+
25
+ # @param topic [String]
26
+ # @param partition [Integer]
27
+ def report_lag(topic, partition)
28
+ self.topics[topic.to_s] ||= Topic.new(topic, self)
29
+ self.topics[topic.to_s].report_lag(partition)
30
+ end
31
+
32
+ # @param topic [String]
33
+ # @param partition [Integer]
34
+ # @param lag [Integer]
35
+ def assign_lag(topic, partition, lag)
36
+ self.topics[topic.to_s] ||= Topic.new(topic, self)
37
+ self.topics[topic.to_s].assign_lag(partition, lag)
38
+ end
39
+
40
+ # Figure out the current lag by asking Kafka based on the current offset.
41
+ # @param topic [String]
42
+ # @param partition [Integer]
43
+ # @param offset [Integer]
44
+ def compute_lag(topic, partition, offset)
45
+ self.topics[topic.to_s] ||= Topic.new(topic, self)
46
+ self.topics[topic.to_s].compute_lag(partition, offset)
47
+ end
48
+ end
49
+
50
+ # Topic which has a hash of partition => last known offset lag
51
+ class Topic
52
+ # @return [String]
53
+ attr_accessor :topic_name
54
+ # @return [Hash<Integer, Integer>]
55
+ attr_accessor :partition_offset_lags
56
+ # @return [ConsumerGroup]
57
+ attr_accessor :consumer_group
58
+
59
+ # @param topic_name [String]
60
+ # @param group [ConsumerGroup]
61
+ def initialize(topic_name, group)
62
+ self.topic_name = topic_name
63
+ self.consumer_group = group
64
+ self.partition_offset_lags = {}
65
+ end
66
+
67
+ # @param partition [Integer]
68
+ # @param lag [Integer]
69
+ def assign_lag(partition, lag)
70
+ self.partition_offset_lags[partition.to_i] = lag
71
+ end
72
+
73
+ # @param partition [Integer]
74
+ # @param offset [Integer]
75
+ def compute_lag(partition, offset)
76
+ return if self.partition_offset_lags[partition.to_i]
77
+
78
+ begin
79
+ client = Phobos.create_kafka_client
80
+ last_offset = client.last_offset_for(self.topic_name, partition)
81
+ assign_lag(partition, [last_offset - offset, 0].max)
82
+ rescue StandardError # don't do anything, just wait
83
+ Deimos.config.logger.
84
+ debug("Error computing lag for #{self.topic_name}, will retry")
85
+ end
86
+ end
87
+
88
+ # @param partition [Integer]
89
+ def report_lag(partition)
90
+ lag = self.partition_offset_lags[partition.to_i]
91
+ return unless lag
92
+
93
+ group = self.consumer_group.id
94
+ Deimos.config.logger.
95
+ debug("Sending lag: #{group}/#{partition}: #{lag}")
96
+ Deimos.config.metrics&.gauge('consumer_lag', lag, tags: %W(
97
+ consumer_group:#{group}
98
+ partition:#{partition}
99
+ topic:#{self.topic_name}
100
+ ))
101
+ end
102
+ end
103
+
104
+ @groups = {}
105
+
106
+ class << self
107
+ # Reset all group information.
108
+ def reset
109
+ @groups = {}
110
+ end
111
+
112
+ # @param payload [Hash]
113
+ def message_processed(payload)
114
+ lag = payload[:offset_lag]
115
+ topic = payload[:topic]
116
+ group = payload[:group_id]
117
+ partition = payload[:partition]
118
+
119
+ synchronize do
120
+ @groups[group.to_s] ||= ConsumerGroup.new(group)
121
+ @groups[group.to_s].assign_lag(topic, partition, lag)
122
+ end
123
+ end
124
+
125
+ # @param payload [Hash]
126
+ def offset_seek(payload)
127
+ offset = payload[:offset]
128
+ topic = payload[:topic]
129
+ group = payload[:group_id]
130
+ partition = payload[:partition]
131
+
132
+ synchronize do
133
+ @groups[group.to_s] ||= ConsumerGroup.new(group)
134
+ @groups[group.to_s].compute_lag(topic, partition, offset)
135
+ end
136
+ end
137
+
138
+ # @param payload [Hash]
139
+ def heartbeat(payload)
140
+ group = payload[:group_id]
141
+ synchronize do
142
+ @groups[group.to_s] ||= ConsumerGroup.new(group)
143
+ consumer_group = @groups[group.to_s]
144
+ payload[:topic_partitions].each do |topic, partitions|
145
+ partitions.each do |partition|
146
+ consumer_group.report_lag(topic, partition)
147
+ end
148
+ end
149
+ end
150
+ end
151
+ end
152
+ end
153
+ end
154
+
155
+ ActiveSupport::Notifications.subscribe('start_process_message.consumer.kafka') do |*args|
156
+ next unless Deimos.config.report_lag
157
+
158
+ event = ActiveSupport::Notifications::Event.new(*args)
159
+ Deimos::Utils::LagReporter.message_processed(event.payload)
160
+ end
161
+
162
+ ActiveSupport::Notifications.subscribe('start_process_batch.consumer.kafka') do |*args|
163
+ next unless Deimos.config.report_lag
164
+
165
+ event = ActiveSupport::Notifications::Event.new(*args)
166
+ Deimos::Utils::LagReporter.message_processed(event.payload)
167
+ end
168
+
169
+ ActiveSupport::Notifications.subscribe('seek.consumer.kafka') do |*args|
170
+ next unless Deimos.config.report_lag
171
+
172
+ event = ActiveSupport::Notifications::Event.new(*args)
173
+ Deimos::Utils::LagReporter.offset_seek(event.payload)
174
+ end
175
+
176
+ ActiveSupport::Notifications.subscribe('heartbeat.consumer.kafka') do |*args|
177
+ next unless Deimos.config.report_lag
178
+
179
+ event = ActiveSupport::Notifications::Event.new(*args)
180
+ Deimos::Utils::LagReporter.heartbeat(event.payload)
181
+ end
182
+ end
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ module Utils
5
+ # Mostly copied free-form from Phobos::Cli::Runner. We should add a PR to
6
+ # basically replace that implementation with this one to make it more generic.
7
+ class SignalHandler
8
+ SIGNALS = %i(INT TERM QUIT).freeze
9
+
10
+ # Takes any object that responds to the `start` and `stop` methods.
11
+ # @param runner[#start, #stop]
12
+ def initialize(runner)
13
+ @signal_queue = []
14
+ @reader, @writer = IO.pipe
15
+ @runner = runner
16
+ end
17
+
18
+ # Run the runner.
19
+ def run!
20
+ setup_signals
21
+ @runner.start
22
+
23
+ loop do
24
+ case signal_queue.pop
25
+ when *SIGNALS
26
+ @runner.stop
27
+ break
28
+ else
29
+ ready = IO.select([reader, writer])
30
+
31
+ # drain the self-pipe so it won't be returned again next time
32
+ reader.read_nonblock(1) if ready[0].include?(reader)
33
+ end
34
+ end
35
+ end
36
+
37
+ private
38
+
39
+ attr_reader :reader, :writer, :signal_queue, :executor
40
+
41
+ # https://stackoverflow.com/questions/29568298/run-code-when-signal-is-sent-but-do-not-trap-the-signal-in-ruby
42
+ def prepend_handler(signal)
43
+ previous = Signal.trap(signal) do
44
+ previous = -> { raise SignalException, signal } unless previous.respond_to?(:call)
45
+ yield
46
+ previous.call
47
+ end
48
+ end
49
+
50
+ # Trap signals using the self-pipe trick.
51
+ def setup_signals
52
+ at_exit { @runner&.stop }
53
+ SIGNALS.each do |signal|
54
+ prepend_handler(signal) do
55
+ unblock(signal)
56
+ end
57
+ end
58
+ end
59
+
60
+ # Save the signal to the queue and continue on.
61
+ # @param signal [Symbol]
62
+ def unblock(signal)
63
+ writer.write_nonblock('.')
64
+ signal_queue << signal
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ VERSION = '1.0.0-beta15'
5
+ end
@@ -0,0 +1,24 @@
1
+ class <%= migration_class_name %> < ActiveRecord::Migration<%= migration_version %>
2
+ def change
3
+ create_table :kafka_messages, force: true do |t|
4
+ t.string :topic, null: false
5
+ t.binary :message, limit: 10.megabytes
6
+ t.binary :key
7
+ t.string :partition_key
8
+ t.timestamps
9
+ end
10
+
11
+ add_index :kafka_messages, [:topic, :id]
12
+
13
+ create_table :kafka_topic_info, force: true do |t|
14
+ t.string :topic, null: false
15
+ t.string :locked_by
16
+ t.datetime :locked_at
17
+ t.boolean :error, null: false, default: false
18
+ t.integer :retries, null: false, default: 0
19
+ end
20
+ add_index :kafka_topic_info, :topic, unique: true
21
+ add_index :kafka_topic_info, [:locked_by, :error]
22
+ add_index :kafka_topic_info, :locked_at
23
+ end
24
+ end
@@ -0,0 +1,30 @@
1
+ class <%= migration_class_name %> < ActiveRecord::Migration<%= migration_version %>
2
+ def self.up
3
+ create_table :kafka_messages, force: true do |t|
4
+ t.string :topic, null: false
5
+ t.binary :message, limit: 10.megabytes
6
+ t.binary :key
7
+ t.string :partition_key
8
+ t.timestamps
9
+ end
10
+
11
+ add_index :kafka_messages, [:topic, :id]
12
+
13
+ create_table :kafka_topic_info, force: true do |t|
14
+ t.string :topic, null: false
15
+ t.string :locked_by
16
+ t.datetime :locked_at
17
+ t.boolean :error, null: false, default: false
18
+ t.integer :retries, null: false, default: 0
19
+ end
20
+ add_index :kafka_topic_info, :topic, unique: true
21
+ add_index :kafka_topic_info, [:locked_by, :error]
22
+ add_index :kafka_topic_info, :locked_at
23
+ end
24
+
25
+ def self.down
26
+ drop_table :kafka_messages
27
+ drop_table :kafka_topic_info
28
+ end
29
+
30
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rails/generators'
4
+ require 'rails/generators/active_record/migration'
5
+
6
+ module Deimos
7
+ module Generators
8
+ # Generate the database backend migration.
9
+ class DbBackendGenerator < Rails::Generators::Base
10
+ include Rails::Generators::Migration
11
+ if Rails.version < '4'
12
+ extend(ActiveRecord::Generators::Migration)
13
+ else
14
+ include ActiveRecord::Generators::Migration
15
+ end
16
+ source_root File.expand_path('db_backend/templates', __dir__)
17
+ desc 'Add migrations for the database backend'
18
+
19
+ # @return [String]
20
+ def migration_version
21
+ "[#{ActiveRecord::Migration.current_version}]"
22
+ rescue StandardError
23
+ ''
24
+ end
25
+
26
+ # @return [String]
27
+ def db_migrate_path
28
+ if defined?(Rails.application) && Rails.application
29
+ paths = Rails.application.config.paths['db/migrate']
30
+ paths.respond_to?(:to_ary) ? paths.to_ary.first : paths.to_a.first
31
+ else
32
+ 'db/migrate'
33
+ end
34
+ end
35
+
36
+ # Main method to create all the necessary files
37
+ def generate
38
+ if Rails.version < '4'
39
+ migration_template('rails3_migration',
40
+ "#{db_migrate_path}/create_db_backend.rb")
41
+ else
42
+ migration_template('migration',
43
+ "#{db_migrate_path}/create_db_backend.rb")
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'phobos'
4
+ require 'phobos/cli'
5
+
6
+ namespace :deimos do
7
+ desc 'Starts Deimos in the rails environment'
8
+ task start: :environment do
9
+ Deimos.configure do |config|
10
+ config.publish_backend = :kafka_sync if config.publish_backend == :kafka_async
11
+ end
12
+ ENV['DEIMOS_RAKE_TASK'] = 'true'
13
+ STDOUT.sync = true
14
+ Rails.logger.info('Running deimos:start rake task.')
15
+ Phobos::CLI::Commands.start(%w(start --skip_config))
16
+ end
17
+ end
@@ -0,0 +1,81 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'date'
4
+
5
+ # :nodoc:
6
+ module ActiveRecordProducerTest
7
+ describe Deimos::ActiveRecordConsumer do
8
+
9
+ before(:all) do
10
+ ActiveRecord::Base.connection.create_table(:widgets, force: true) do |t|
11
+ t.string(:test_id)
12
+ t.integer(:some_int)
13
+ t.boolean(:some_bool)
14
+ t.datetime(:some_datetime_int)
15
+ t.timestamps
16
+ end
17
+
18
+ # :nodoc:
19
+ class Widget < ActiveRecord::Base
20
+ end
21
+ Widget.reset_column_information
22
+ end
23
+
24
+ after(:all) do
25
+ ActiveRecord::Base.connection.drop_table(:widgets)
26
+ end
27
+
28
+ prepend_before(:each) do
29
+
30
+ consumer_class = Class.new(Deimos::ActiveRecordConsumer) do
31
+ schema 'MySchemaWithDateTimes'
32
+ namespace 'com.my-namespace'
33
+ key_config plain: true
34
+ record_class Widget
35
+ end
36
+ stub_const('MyConsumer', consumer_class)
37
+
38
+ Time.zone = 'Eastern Time (US & Canada)'
39
+ end
40
+
41
+ it 'should receive events correctly' do
42
+ travel 1.day do
43
+ expect(Widget.count).to eq(0)
44
+ test_consume_message(MyConsumer, {
45
+ test_id: 'abc',
46
+ some_int: 3,
47
+ updated_at: 1.day.ago.to_i,
48
+ some_datetime_int: Time.zone.now.to_i,
49
+ timestamp: 2.minutes.ago.to_s
50
+ }, { call_original: true, key: 5 })
51
+
52
+ expect(Widget.count).to eq(1)
53
+ widget = Widget.last
54
+ expect(widget.id).to eq(5)
55
+ expect(widget.test_id).to eq('abc')
56
+ expect(widget.some_int).to eq(3)
57
+ expect(widget.some_datetime_int).to eq(Time.zone.now)
58
+ expect(widget.updated_at).to eq(Time.zone.now)
59
+
60
+ # test update
61
+ test_consume_message(MyConsumer, {
62
+ test_id: 'abcd',
63
+ some_int: 3,
64
+ some_datetime_int: Time.zone.now.to_i,
65
+ timestamp: 2.minutes.ago.to_s
66
+ }, { call_original: true, key: 5 })
67
+ widget = Widget.last
68
+ expect(widget.id).to eq(5)
69
+ expect(widget.test_id).to eq('abcd')
70
+ expect(widget.some_int).to eq(3)
71
+
72
+ # test delete
73
+ test_consume_message(MyConsumer, nil, call_original: true, key: 5)
74
+ expect(Widget.count).to eq(0)
75
+
76
+ end
77
+
78
+ end
79
+
80
+ end
81
+ end