deimos-ruby 1.6.1 → 1.8.0.pre.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +9 -0
  3. data/.rubocop.yml +15 -13
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +30 -0
  6. data/Gemfile.lock +87 -80
  7. data/README.md +139 -15
  8. data/Rakefile +1 -1
  9. data/deimos-ruby.gemspec +3 -2
  10. data/docs/ARCHITECTURE.md +144 -0
  11. data/docs/CONFIGURATION.md +27 -0
  12. data/lib/deimos.rb +7 -6
  13. data/lib/deimos/active_record_consume/batch_consumption.rb +159 -0
  14. data/lib/deimos/active_record_consume/batch_slicer.rb +27 -0
  15. data/lib/deimos/active_record_consume/message_consumption.rb +58 -0
  16. data/lib/deimos/active_record_consume/schema_model_converter.rb +52 -0
  17. data/lib/deimos/active_record_consumer.rb +33 -75
  18. data/lib/deimos/active_record_producer.rb +23 -0
  19. data/lib/deimos/batch_consumer.rb +2 -140
  20. data/lib/deimos/config/configuration.rb +28 -10
  21. data/lib/deimos/consume/batch_consumption.rb +148 -0
  22. data/lib/deimos/consume/message_consumption.rb +93 -0
  23. data/lib/deimos/consumer.rb +79 -69
  24. data/lib/deimos/kafka_message.rb +1 -1
  25. data/lib/deimos/kafka_source.rb +29 -23
  26. data/lib/deimos/kafka_topic_info.rb +1 -1
  27. data/lib/deimos/message.rb +6 -1
  28. data/lib/deimos/metrics/provider.rb +0 -2
  29. data/lib/deimos/poll_info.rb +9 -0
  30. data/lib/deimos/tracing/provider.rb +0 -2
  31. data/lib/deimos/utils/db_poller.rb +149 -0
  32. data/lib/deimos/utils/db_producer.rb +8 -3
  33. data/lib/deimos/utils/deadlock_retry.rb +68 -0
  34. data/lib/deimos/utils/lag_reporter.rb +19 -26
  35. data/lib/deimos/version.rb +1 -1
  36. data/lib/generators/deimos/db_poller/templates/migration +11 -0
  37. data/lib/generators/deimos/db_poller/templates/rails3_migration +16 -0
  38. data/lib/generators/deimos/db_poller_generator.rb +48 -0
  39. data/lib/tasks/deimos.rake +7 -0
  40. data/spec/active_record_batch_consumer_spec.rb +481 -0
  41. data/spec/active_record_consume/batch_slicer_spec.rb +42 -0
  42. data/spec/active_record_consume/schema_model_converter_spec.rb +105 -0
  43. data/spec/active_record_consumer_spec.rb +22 -11
  44. data/spec/active_record_producer_spec.rb +66 -88
  45. data/spec/batch_consumer_spec.rb +23 -7
  46. data/spec/config/configuration_spec.rb +4 -0
  47. data/spec/consumer_spec.rb +8 -8
  48. data/spec/deimos_spec.rb +57 -49
  49. data/spec/handlers/my_batch_consumer.rb +6 -1
  50. data/spec/handlers/my_consumer.rb +6 -1
  51. data/spec/kafka_source_spec.rb +53 -0
  52. data/spec/message_spec.rb +19 -0
  53. data/spec/producer_spec.rb +3 -3
  54. data/spec/rake_spec.rb +1 -1
  55. data/spec/schemas/com/my-namespace/MySchemaCompound-key.avsc +18 -0
  56. data/spec/schemas/com/my-namespace/Wibble.avsc +43 -0
  57. data/spec/spec_helper.rb +61 -6
  58. data/spec/utils/db_poller_spec.rb +320 -0
  59. data/spec/utils/deadlock_retry_spec.rb +74 -0
  60. data/spec/utils/lag_reporter_spec.rb +29 -22
  61. metadata +61 -20
  62. data/lib/deimos/base_consumer.rb +0 -104
  63. data/lib/deimos/utils/executor.rb +0 -124
  64. data/lib/deimos/utils/platform_schema_validation.rb +0 -0
  65. data/lib/deimos/utils/signal_handler.rb +0 -68
  66. data/spec/utils/executor_spec.rb +0 -53
  67. data/spec/utils/signal_handler_spec.rb +0 -16
@@ -0,0 +1,320 @@
1
+ # frozen_string_literal: true
2
+
3
+ # @param seconds [Integer]
4
+ # @return [Time]
5
+ def time_value(secs: 0, mins: 0)
6
+ Time.local(2015, 5, 5, 1, 0, 0) + (secs + (mins * 60))
7
+ end
8
+
9
+ each_db_config(Deimos::Utils::DbPoller) do
10
+
11
+ before(:each) do
12
+ Deimos::PollInfo.delete_all
13
+ end
14
+
15
+ describe '#start!' do
16
+
17
+ before(:each) do
18
+ producer_class = Class.new(Deimos::Producer) do
19
+ schema 'MySchema'
20
+ namespace 'com.my-namespace'
21
+ topic 'my-topic'
22
+ key_config field: 'test_id'
23
+ end
24
+ stub_const('MyProducer', producer_class)
25
+
26
+ producer_class = Class.new(Deimos::Producer) do
27
+ schema 'MySchemaWithId'
28
+ namespace 'com.my-namespace'
29
+ topic 'my-topic'
30
+ key_config plain: true
31
+ end
32
+ stub_const('MyProducerWithID', producer_class)
33
+ end
34
+
35
+ it 'should raise an error if no pollers configured' do
36
+ Deimos.configure {}
37
+ expect { described_class.start! }.to raise_error('No pollers configured!')
38
+ end
39
+
40
+ it 'should start pollers as configured' do
41
+ Deimos.configure do
42
+ db_poller do
43
+ producer_class 'MyProducer'
44
+ end
45
+ db_poller do
46
+ producer_class 'MyProducerWithID'
47
+ end
48
+ end
49
+
50
+ allow(Deimos::Utils::DbPoller).to receive(:new)
51
+ signal_double = instance_double(Sigurd::SignalHandler, run!: nil)
52
+ allow(Sigurd::SignalHandler).to receive(:new).and_return(signal_double)
53
+ described_class.start!
54
+ expect(Deimos::Utils::DbPoller).to have_received(:new).twice
55
+ expect(Deimos::Utils::DbPoller).to have_received(:new).
56
+ with(Deimos.config.db_poller_objects[0])
57
+ expect(Deimos::Utils::DbPoller).to have_received(:new).
58
+ with(Deimos.config.db_poller_objects[1])
59
+ end
60
+ end
61
+
62
+ describe 'pollers' do
63
+ include_context 'with widgets'
64
+
65
+ let(:poller) do
66
+ poller = described_class.new(config)
67
+ allow(poller).to receive(:sleep)
68
+ poller
69
+ end
70
+
71
+ let(:config) { Deimos.config.db_poller_objects.first.dup }
72
+
73
+ before(:each) do
74
+ Widget.delete_all
75
+ producer_class = Class.new(Deimos::ActiveRecordProducer) do
76
+ schema 'MySchemaWithId'
77
+ namespace 'com.my-namespace'
78
+ topic 'my-topic-with-id'
79
+ key_config none: true
80
+ record_class Widget
81
+
82
+ # :nodoc:
83
+ def self.generate_payload(attrs, widget)
84
+ super.merge(message_id: widget.generated_id)
85
+ end
86
+ end
87
+ stub_const('MyProducer', producer_class)
88
+
89
+ Deimos.configure do
90
+ db_poller do
91
+ producer_class 'MyProducer'
92
+ run_every 1.minute
93
+ end
94
+ end
95
+ end
96
+
97
+ after(:each) do
98
+ travel_back
99
+ end
100
+
101
+ it 'should crash if initialized with an invalid producer' do
102
+ config.producer_class = 'NoProducer'
103
+ expect { described_class.new(config) }.to raise_error('Class NoProducer not found!')
104
+ end
105
+
106
+ describe '#retrieve_poll_info' do
107
+
108
+ it 'should start from beginning when configured' do
109
+ poller.retrieve_poll_info
110
+ expect(Deimos::PollInfo.count).to eq(1)
111
+ info = Deimos::PollInfo.last
112
+ expect(info.producer).to eq('MyProducer')
113
+ expect(info.last_sent).to eq(Time.new(0))
114
+ expect(info.last_sent_id).to eq(0)
115
+ end
116
+
117
+ it 'should start from now when configured' do
118
+ travel_to time_value
119
+ config.start_from_beginning = false
120
+ poller.retrieve_poll_info
121
+ expect(Deimos::PollInfo.count).to eq(1)
122
+ info = Deimos::PollInfo.last
123
+ expect(info.producer).to eq('MyProducer')
124
+ expect(info.last_sent).to eq(time_value)
125
+ expect(info.last_sent_id).to eq(0)
126
+ end
127
+
128
+ end
129
+
130
+ specify '#start' do
131
+ i = 0
132
+ expect(poller).to receive(:process_updates).twice do
133
+ i += 1
134
+ poller.stop if i == 2
135
+ end
136
+ poller.start
137
+ end
138
+
139
+ specify '#should_run?' do
140
+ Deimos::PollInfo.create!(producer: 'MyProducer',
141
+ last_sent: time_value)
142
+ poller.retrieve_poll_info
143
+
144
+ # run_every is set to 1 minute
145
+ travel_to time_value(secs: 62)
146
+ expect(poller.should_run?).to eq(true)
147
+
148
+ travel_to time_value(secs: 30)
149
+ expect(poller.should_run?).to eq(false)
150
+
151
+ travel_to time_value(mins: -1) # this shouldn't be possible but meh
152
+ expect(poller.should_run?).to eq(false)
153
+
154
+ # take the 2 seconds of delay_time into account
155
+ travel_to time_value(secs: 60)
156
+ expect(poller.should_run?).to eq(false)
157
+ end
158
+
159
+ specify '#process_batch' do
160
+ travel_to time_value
161
+ widgets = (1..3).map { Widget.create!(test_id: 'some_id', some_int: 4) }
162
+ widgets.last.update_attribute(:updated_at, time_value(mins: -30))
163
+ expect(MyProducer).to receive(:send_events).with(widgets)
164
+ poller.retrieve_poll_info
165
+ poller.process_batch(widgets)
166
+ info = Deimos::PollInfo.last
167
+ expect(info.last_sent.in_time_zone).to eq(time_value(mins: -30))
168
+ expect(info.last_sent_id).to eq(widgets.last.id)
169
+ end
170
+
171
+ describe '#process_updates' do
172
+ before(:each) do
173
+ Deimos::PollInfo.create!(producer: 'MyProducer',
174
+ last_sent: time_value(mins: -61),
175
+ last_sent_id: 0)
176
+ poller.retrieve_poll_info
177
+ travel_to time_value
178
+ stub_const('Deimos::Utils::DbPoller::BATCH_SIZE', 3)
179
+ end
180
+
181
+ let!(:old_widget) do
182
+ # old widget, earlier than window
183
+ Widget.create!(test_id: 'some_id', some_int: 40,
184
+ updated_at: time_value(mins: -200))
185
+ end
186
+
187
+ let!(:last_widget) do
188
+ # new widget, before delay
189
+ Widget.create!(test_id: 'some_id', some_int: 10,
190
+ updated_at: time_value(secs: -1))
191
+ end
192
+
193
+ let!(:widgets) do
194
+ (1..7).map do |i|
195
+ Widget.create!(test_id: 'some_id', some_int: i,
196
+ updated_at: time_value(mins: -61, secs: 30 + i))
197
+ end
198
+ end
199
+
200
+ it 'should update the full table' do
201
+ info = Deimos::PollInfo.last
202
+ config.full_table = true
203
+ expect(MyProducer).to receive(:poll_query).at_least(:once).and_call_original
204
+ expect(poller).to receive(:process_batch).ordered.
205
+ with([old_widget, widgets[0], widgets[1]]).and_wrap_original do |m, *args|
206
+ m.call(*args)
207
+ expect(info.reload.last_sent.in_time_zone).to eq(time_value(mins: -61, secs: 32))
208
+ expect(info.last_sent_id).to eq(widgets[1].id)
209
+ end
210
+ expect(poller).to receive(:process_batch).ordered.
211
+ with([widgets[2], widgets[3], widgets[4]]).and_call_original
212
+ expect(poller).to receive(:process_batch).ordered.
213
+ with([widgets[5], widgets[6]]).and_call_original
214
+ poller.process_updates
215
+
216
+ # this is the updated_at of widgets[6]
217
+ expect(info.reload.last_sent.in_time_zone).to eq(time_value(mins: -61, secs: 37))
218
+ expect(info.last_sent_id).to eq(widgets[6].id)
219
+
220
+ last_widget.update_attribute(:updated_at, time_value(mins: -250))
221
+
222
+ travel 61.seconds
223
+ # should reprocess the table
224
+ expect(poller).to receive(:process_batch).ordered.
225
+ with([last_widget, old_widget, widgets[0]]).and_call_original
226
+ expect(poller).to receive(:process_batch).ordered.
227
+ with([widgets[1], widgets[2], widgets[3]]).and_call_original
228
+ expect(poller).to receive(:process_batch).ordered.
229
+ with([widgets[4], widgets[5], widgets[6]]).and_call_original
230
+ poller.process_updates
231
+
232
+ expect(info.reload.last_sent.in_time_zone).to eq(time_value(mins: -61, secs: 37))
233
+ expect(info.last_sent_id).to eq(widgets[6].id)
234
+ end
235
+
236
+ it 'should send events across multiple batches' do
237
+ allow(MyProducer).to receive(:poll_query).and_call_original
238
+ expect(poller).to receive(:process_batch).ordered.
239
+ with([widgets[0], widgets[1], widgets[2]]).and_call_original
240
+ expect(poller).to receive(:process_batch).ordered.
241
+ with([widgets[3], widgets[4], widgets[5]]).and_call_original
242
+ expect(poller).to receive(:process_batch).ordered.
243
+ with([widgets[6]]).and_call_original
244
+ poller.process_updates
245
+
246
+ expect(MyProducer).to have_received(:poll_query).
247
+ with(time_from: time_value(mins: -61),
248
+ time_to: time_value(secs: -2),
249
+ column_name: :updated_at,
250
+ min_id: 0)
251
+
252
+ travel 61.seconds
253
+ # process the last widget which came in during the delay
254
+ expect(poller).to receive(:process_batch).with([last_widget]).
255
+ and_call_original
256
+ poller.process_updates
257
+
258
+ # widgets[6] updated_at value
259
+ expect(MyProducer).to have_received(:poll_query).
260
+ with(time_from: time_value(mins: -61, secs: 37),
261
+ time_to: time_value(secs: 59), # plus 61 seconds minus 2 seconds for delay
262
+ column_name: :updated_at,
263
+ min_id: widgets[6].id)
264
+
265
+ travel 61.seconds
266
+ # nothing else to process
267
+ expect(poller).not_to receive(:process_batch)
268
+ poller.process_updates
269
+ poller.process_updates
270
+
271
+ expect(MyProducer).to have_received(:poll_query).twice.
272
+ with(time_from: time_value(secs: -1),
273
+ time_to: time_value(secs: 120), # plus 122 seconds minus 2 seconds
274
+ column_name: :updated_at,
275
+ min_id: last_widget.id)
276
+ end
277
+
278
+ it 'should recover correctly with errors and save the right ID' do
279
+ widgets.each do |w|
280
+ w.update_attribute(:updated_at, time_value(mins: -61, secs: 30))
281
+ end
282
+ allow(MyProducer).to receive(:poll_query).and_call_original
283
+ expect(poller).to receive(:process_batch).ordered.
284
+ with([widgets[0], widgets[1], widgets[2]]).and_call_original
285
+ expect(poller).to receive(:process_batch).ordered.
286
+ with([widgets[3], widgets[4], widgets[5]]).and_raise('OH NOES')
287
+
288
+ expect { poller.process_updates }.to raise_exception('OH NOES')
289
+
290
+ expect(MyProducer).to have_received(:poll_query).
291
+ with(time_from: time_value(mins: -61),
292
+ time_to: time_value(secs: -2),
293
+ column_name: :updated_at,
294
+ min_id: 0)
295
+
296
+ info = Deimos::PollInfo.last
297
+ expect(info.last_sent.in_time_zone).to eq(time_value(mins: -61, secs: 30))
298
+ expect(info.last_sent_id).to eq(widgets[2].id)
299
+
300
+ travel 61.seconds
301
+ # process the last widget which came in during the delay
302
+ expect(poller).to receive(:process_batch).ordered.
303
+ with([widgets[3], widgets[4], widgets[5]]).and_call_original
304
+ expect(poller).to receive(:process_batch).with([widgets[6], last_widget]).
305
+ and_call_original
306
+ poller.process_updates
307
+ expect(MyProducer).to have_received(:poll_query).
308
+ with(time_from: time_value(mins: -61, secs: 30),
309
+ time_to: time_value(secs: 59),
310
+ column_name: :updated_at,
311
+ min_id: widgets[2].id)
312
+
313
+ expect(info.reload.last_sent.in_time_zone).to eq(time_value(secs: -1))
314
+ expect(info.last_sent_id).to eq(last_widget.id)
315
+ end
316
+
317
+ end
318
+
319
+ end
320
+ end
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ RSpec.describe Deimos::Utils::DeadlockRetry do
4
+ include_context 'with widgets'
5
+
6
+ before(:each) do
7
+ allow(described_class).to receive(:sleep)
8
+ end
9
+
10
+ describe 'deadlock handling' do
11
+ let(:batch) { [{ key: 1, payload: { test_id: 'abc', some_int: 3 } }] }
12
+
13
+ it 'should retry deadlocks 3 times' do
14
+ # Should receive original attempt + 2 retries
15
+ expect(Widget).
16
+ to receive(:create).
17
+ and_raise(ActiveRecord::Deadlocked.new('Lock wait timeout exceeded')).
18
+ exactly(3).times
19
+
20
+ # After 3 tries, should let it bubble up
21
+ expect {
22
+ described_class.wrap do
23
+ Widget.create(test_id: 'abc')
24
+ end
25
+ }.to raise_error(ActiveRecord::Deadlocked)
26
+ end
27
+
28
+ it 'should stop retrying deadlocks after success' do
29
+ allow(Widget).
30
+ to receive(:create).
31
+ with(hash_including(test_id: 'first')).
32
+ and_call_original
33
+
34
+ # Fail on first attempt, succeed on second
35
+ expect(Widget).
36
+ to receive(:create).
37
+ with(hash_including(test_id: 'second')).
38
+ and_raise(ActiveRecord::Deadlocked.new('Deadlock found when trying to get lock')).
39
+ once.
40
+ ordered
41
+
42
+ expect(Widget).
43
+ to receive(:create).
44
+ with(hash_including(test_id: 'second')).
45
+ once.
46
+ ordered.
47
+ and_call_original
48
+
49
+ # Should not raise anything
50
+ described_class.wrap do
51
+ Widget.create(test_id: 'first')
52
+ Widget.create(test_id: 'second')
53
+ end
54
+
55
+ expect(Widget.all).to match_array([
56
+ have_attributes(test_id: 'first'),
57
+ have_attributes(test_id: 'second')
58
+ ])
59
+ end
60
+
61
+ it 'should not retry non-deadlock exceptions' do
62
+ expect(Widget).
63
+ to receive(:create).
64
+ and_raise(ActiveRecord::StatementInvalid.new('Oops!!')).
65
+ once
66
+
67
+ expect {
68
+ described_class.wrap do
69
+ Widget.create(test_id: 'abc')
70
+ end
71
+ }.to raise_error(ActiveRecord::StatementInvalid, 'Oops!!')
72
+ end
73
+ end
74
+ end
@@ -2,8 +2,11 @@
2
2
 
3
3
  describe Deimos::Utils::LagReporter do
4
4
 
5
+ let(:kafka_client) { instance_double(Kafka::Client) }
6
+ let(:partition1_tags) { %w(consumer_group:group1 partition:1 topic:my-topic) }
7
+ let(:partition2_tags) { %w(consumer_group:group1 partition:2 topic:my-topic) }
8
+
5
9
  before(:each) do
6
- kafka_client = instance_double(Kafka::Client)
7
10
  allow(kafka_client).to receive(:last_offset_for).and_return(100)
8
11
  allow(Phobos).to receive(:create_kafka_client).and_return(kafka_client)
9
12
  Deimos.configure { |c| c.consumers.report_lag = true }
@@ -20,38 +23,22 @@ describe Deimos::Utils::LagReporter do
20
23
  'heartbeat.consumer.kafka',
21
24
  group_id: 'group1', topic_partitions: { 'my-topic': [1] }
22
25
  )
23
-
24
26
  end
25
27
 
26
28
  it 'should report lag' do
27
29
  expect(Deimos.config.metrics).to receive(:gauge).ordered.twice.
28
- with('consumer_lag', 95,
29
- tags: %w(
30
- consumer_group:group1
31
- partition:1
32
- topic:my-topic
33
- ))
30
+ with('consumer_lag', 95, tags: partition1_tags)
34
31
  expect(Deimos.config.metrics).to receive(:gauge).ordered.once.
35
- with('consumer_lag', 80,
36
- tags: %w(
37
- consumer_group:group1
38
- partition:2
39
- topic:my-topic
40
- ))
32
+ with('consumer_lag', 80, tags: partition2_tags)
41
33
  expect(Deimos.config.metrics).to receive(:gauge).ordered.once.
42
- with('consumer_lag', 0,
43
- tags: %w(
44
- consumer_group:group1
45
- partition:2
46
- topic:my-topic
47
- ))
34
+ with('consumer_lag', 0, tags: partition2_tags)
48
35
  ActiveSupport::Notifications.instrument(
49
36
  'seek.consumer.kafka',
50
37
  offset: 5, topic: 'my-topic', group_id: 'group1', partition: 1
51
38
  )
52
39
  ActiveSupport::Notifications.instrument(
53
40
  'start_process_message.consumer.kafka',
54
- offset_lag: 80, topic: 'my-topic', group_id: 'group1', partition: 2
41
+ offset: 20, topic: 'my-topic', group_id: 'group1', partition: 2
55
42
  )
56
43
  ActiveSupport::Notifications.instrument(
57
44
  'heartbeat.consumer.kafka',
@@ -59,8 +46,28 @@ describe Deimos::Utils::LagReporter do
59
46
  )
60
47
  ActiveSupport::Notifications.instrument(
61
48
  'start_process_batch.consumer.kafka',
62
- offset_lag: 0, topic: 'my-topic', group_id: 'group1', partition: 2
49
+ last_offset: 100, topic: 'my-topic', group_id: 'group1', partition: 2
50
+ )
51
+ ActiveSupport::Notifications.instrument(
52
+ 'heartbeat.consumer.kafka',
53
+ group_id: 'group1', topic_partitions: { 'my-topic': [1, 2] }
54
+ )
55
+ end
56
+
57
+ it 'should update lag after heartbeat' do
58
+ expect(Deimos.config.metrics).to receive(:gauge).ordered.once.
59
+ with('consumer_lag', 94, tags: partition2_tags)
60
+ expect(Deimos.config.metrics).to receive(:gauge).ordered.once.
61
+ with('consumer_lag', 95, tags: partition2_tags)
62
+ ActiveSupport::Notifications.instrument(
63
+ 'seek.consumer.kafka',
64
+ offset: 6, topic: 'my-topic', group_id: 'group1', partition: 2
65
+ )
66
+ ActiveSupport::Notifications.instrument(
67
+ 'heartbeat.consumer.kafka',
68
+ group_id: 'group1', topic_partitions: { 'my-topic': [1, 2] }
63
69
  )
70
+ allow(kafka_client).to receive(:last_offset_for).and_return(101)
64
71
  ActiveSupport::Notifications.instrument(
65
72
  'heartbeat.consumer.kafka',
66
73
  group_id: 'group1', topic_partitions: { 'my-topic': [1, 2] }