deimos-ruby 1.6.4 → 1.7.0.pre.beta1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a015ca37d59e16b4356d9f51e376a4a98a051fdf42d5331ab9f61c6bd6014da6
4
- data.tar.gz: 7f38827e24bca8ea50159b4810d7eb08dec6ffd29feb488c4f9e43516b55a369
3
+ metadata.gz: ab8ac284db2c98dac5624caf5bf75118ad89fb9ec6e1f3109f15373f2bf4c8be
4
+ data.tar.gz: 7d26a7d8d163ab4783638c9393bea2d1a7c8f364a7eed2ea4cb699cbcbafd244
5
5
  SHA512:
6
- metadata.gz: '08a2f52f66816717d388be2a888979d4a55d0bb77af21c0f5e23ec5717449630ff8552fc060efb58cd7f999c5c5957de38d7fdb024f1a7e3076f8a44f6687430'
7
- data.tar.gz: fb6112cec7a2e072332960b5d546b1b90ee92376dc73e348b3cf3ed459a4a9814857d19ee59799b0011bef2f08dbc7f89bc6178cd55ed268cd67c45dabd2afe1
6
+ metadata.gz: 2610223a8d8c2546dad4037e4d2b1845e77372b304f4c072c269316bceed4a6626bd36d3541561d91e1c68a06ed891c051feb16a1800763cf805061c05cadc58
7
+ data.tar.gz: 3916fa546b45182b987b0409d51bcce96c5a1beee449722861819f76377258e7ccb5700f50d2c0d8d1791986ed4f5d19b7c989cdcdbe77d1289e8531eb24ba15
data/.circleci/config.yml CHANGED
@@ -20,6 +20,9 @@ jobs:
20
20
  # Bundle install dependencies in /tmp/
21
21
  # so Dockerfile does not copy them since
22
22
  # its base image is different than CircleCI
23
+ - run:
24
+ name: Install bundler
25
+ command: gem install bundler:2.1.4
23
26
  - run:
24
27
  name: Bundle install
25
28
  command: bundle install --path vendor/bundle --jobs=4 --retry=3
@@ -40,6 +43,9 @@ jobs:
40
43
  steps:
41
44
  - attach_workspace:
42
45
  at: ~/workspace
46
+ - run:
47
+ name: Install bundler
48
+ command: gem install bundler:2.1.4
43
49
  - run:
44
50
  name: Point bundle to vendor/bundle
45
51
  command: bundle --path vendor/bundle
@@ -50,6 +56,9 @@ jobs:
50
56
  steps:
51
57
  - attach_workspace:
52
58
  at: ~/workspace
59
+ - run:
60
+ name: Install bundler
61
+ command: gem install bundler:2.1.4
53
62
  - run:
54
63
  name: Point bundle to vendor/bundle
55
64
  command: bundle --path vendor/bundle
data/.rubocop.yml CHANGED
@@ -1,10 +1,11 @@
1
1
  require: rubocop-rspec
2
2
 
3
3
  AllCops:
4
- TargetRubyVersion: 2.3
4
+ TargetRubyVersion: 2.4
5
5
  Exclude:
6
6
  - lib/deimos/monkey_patches/*.rb
7
7
  - vendor/**/*
8
+ NewCops: enable
8
9
 
9
10
  # class Plumbus
10
11
  # private
@@ -34,6 +35,12 @@ Layout/DotPosition:
34
35
  Layout/EmptyLinesAroundBlockBody:
35
36
  Enabled: false
36
37
 
38
+ Layout/LineLength:
39
+ Max: 100
40
+ Severity: refactor
41
+ Exclude:
42
+ - 'spec/**/*'
43
+
37
44
  # foo = if expression
38
45
  # 'bar'
39
46
  # end
@@ -82,12 +89,6 @@ Metrics/CyclomaticComplexity:
82
89
  Severity: refactor
83
90
  Max: 20
84
91
 
85
- Metrics/LineLength:
86
- Max: 100
87
- Severity: refactor
88
- Exclude:
89
- - 'spec/**/*'
90
-
91
92
  Metrics/MethodLength:
92
93
  Severity: refactor
93
94
  Max: 30
@@ -123,12 +124,6 @@ Style/BlockDelimiters:
123
124
  # some_method(x, y, {a: 1, b: 2})
124
125
  # some_method(x, y, {a: 1, b: 2}, a: 1, b: 2)
125
126
 
126
- # good
127
- # some_method(x, y, a: 1, b: 2)
128
- # some_method(x, y, {a: 1, b: 2}, {a: 1, b: 2})
129
- Style/BracesAroundHashParameters:
130
- EnforcedStyle: context_dependent
131
-
132
127
  # Enable both this:
133
128
  # MyModule::MyClass
134
129
  # and this:
@@ -179,6 +174,13 @@ Style/GuardClause:
179
174
  Style/HashSyntax:
180
175
  EnforcedStyle: ruby19_no_mixed_keys
181
176
 
177
+ # We are still unofficially targeting Ruby 2.3
178
+ Style/HashTransformKeys:
179
+ Enabled: false
180
+
181
+ Style/HashTransformValues:
182
+ Enabled: false
183
+
182
184
  Style/IfUnlessModifier:
183
185
  Enabled: false
184
186
 
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 2.5.1
1
+ 2.5.3
data/CHANGELOG.md CHANGED
@@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## UNRELEASED
9
9
 
10
+ ### Features :star:
11
+ - Added the DB Poller feature / process.
12
+
10
13
  ## 1.6.4 - 2020-05-11
11
14
  - Fixed the payload logging fix for errored messages as well.
12
15
 
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- deimos-ruby (1.6.4)
4
+ deimos-ruby (1.7.0.pre.beta1)
5
5
  avro_turf (~> 0.11)
6
6
  phobos (~> 1.9)
7
7
  ruby-kafka (~> 0.7)
@@ -41,7 +41,7 @@ GEM
41
41
  activemodel (= 5.2.4.2)
42
42
  activesupport (= 5.2.4.2)
43
43
  arel (>= 9.0)
44
- activerecord-import (1.0.3)
44
+ activerecord-import (1.0.4)
45
45
  activerecord (>= 3.2)
46
46
  activestorage (5.2.4.2)
47
47
  actionpack (= 5.2.4.2)
@@ -54,30 +54,30 @@ GEM
54
54
  tzinfo (~> 1.1)
55
55
  arel (9.0.0)
56
56
  ast (2.4.0)
57
- avro (1.9.1)
57
+ avro (1.9.2)
58
58
  multi_json
59
59
  avro_turf (0.11.0)
60
60
  avro (>= 1.7.7, < 1.10)
61
61
  excon (~> 0.45)
62
62
  builder (3.2.4)
63
63
  coderay (1.1.2)
64
- concurrent-ruby (1.1.5)
65
- concurrent-ruby-ext (1.1.5)
66
- concurrent-ruby (= 1.1.5)
64
+ concurrent-ruby (1.1.6)
65
+ concurrent-ruby-ext (1.1.6)
66
+ concurrent-ruby (= 1.1.6)
67
67
  crass (1.0.6)
68
- ddtrace (0.30.0)
68
+ ddtrace (0.35.1)
69
69
  msgpack
70
70
  diff-lcs (1.3)
71
71
  digest-crc (0.5.1)
72
- dogstatsd-ruby (4.5.0)
72
+ dogstatsd-ruby (4.8.0)
73
73
  erubi (1.9.0)
74
74
  excon (0.73.0)
75
75
  exponential-backoff (0.0.4)
76
- ffi (1.11.3)
76
+ ffi (1.12.2)
77
77
  formatador (0.2.5)
78
78
  globalid (0.4.2)
79
79
  activesupport (>= 4.2.0)
80
- guard (2.16.1)
80
+ guard (2.16.2)
81
81
  formatador (>= 0.2.4)
82
82
  listen (>= 2.7, < 4.0)
83
83
  lumberjack (>= 1.0.12, < 2.0)
@@ -107,17 +107,17 @@ GEM
107
107
  loofah (2.5.0)
108
108
  crass (~> 1.0.2)
109
109
  nokogiri (>= 1.5.9)
110
- lumberjack (1.0.13)
110
+ lumberjack (1.2.4)
111
111
  mail (2.7.1)
112
112
  mini_mime (>= 0.1.1)
113
113
  marcel (0.3.3)
114
114
  mimemagic (~> 0.3.2)
115
- method_source (0.9.2)
116
- mimemagic (0.3.4)
115
+ method_source (1.0.0)
116
+ mimemagic (0.3.5)
117
117
  mini_mime (1.0.2)
118
118
  mini_portile2 (2.4.0)
119
119
  minitest (5.14.0)
120
- msgpack (1.3.1)
120
+ msgpack (1.3.3)
121
121
  multi_json (1.14.1)
122
122
  mysql2 (0.5.3)
123
123
  nenv (0.3.0)
@@ -128,9 +128,9 @@ GEM
128
128
  nenv (~> 0.1)
129
129
  shellany (~> 0.0)
130
130
  parallel (1.19.1)
131
- parser (2.6.5.0)
131
+ parser (2.7.1.2)
132
132
  ast (~> 2.4.0)
133
- pg (1.1.4)
133
+ pg (1.2.3)
134
134
  phobos (1.9.0)
135
135
  activesupport (>= 3.0.0)
136
136
  concurrent-ruby (>= 1.0.2)
@@ -139,9 +139,9 @@ GEM
139
139
  logging
140
140
  ruby-kafka
141
141
  thor
142
- pry (0.12.2)
143
- coderay (~> 1.1.0)
144
- method_source (~> 0.9.0)
142
+ pry (0.13.1)
143
+ coderay (~> 1.1)
144
+ method_source (~> 1.0)
145
145
  rack (2.2.2)
146
146
  rack-test (1.1.0)
147
147
  rack (>= 1.0, < 3)
@@ -171,32 +171,34 @@ GEM
171
171
  thor (>= 0.19.0, < 2.0)
172
172
  rainbow (3.0.0)
173
173
  rake (13.0.1)
174
- rb-fsevent (0.10.3)
175
- rb-inotify (0.10.0)
174
+ rb-fsevent (0.10.4)
175
+ rb-inotify (0.10.1)
176
176
  ffi (~> 1.0)
177
+ rexml (3.2.4)
177
178
  rspec (3.9.0)
178
179
  rspec-core (~> 3.9.0)
179
180
  rspec-expectations (~> 3.9.0)
180
181
  rspec-mocks (~> 3.9.0)
181
- rspec-core (3.9.0)
182
- rspec-support (~> 3.9.0)
183
- rspec-expectations (3.9.0)
182
+ rspec-core (3.9.2)
183
+ rspec-support (~> 3.9.3)
184
+ rspec-expectations (3.9.1)
184
185
  diff-lcs (>= 1.2.0, < 2.0)
185
186
  rspec-support (~> 3.9.0)
186
- rspec-mocks (3.9.0)
187
+ rspec-mocks (3.9.1)
187
188
  diff-lcs (>= 1.2.0, < 2.0)
188
189
  rspec-support (~> 3.9.0)
189
- rspec-support (3.9.0)
190
+ rspec-support (3.9.3)
190
191
  rspec_junit_formatter (0.4.1)
191
192
  rspec-core (>= 2, < 4, != 2.12.0)
192
- rubocop (0.77.0)
193
+ rubocop (0.82.0)
193
194
  jaro_winkler (~> 1.5.1)
194
195
  parallel (~> 1.10)
195
- parser (>= 2.6)
196
+ parser (>= 2.7.0.1)
196
197
  rainbow (>= 2.2.2, < 4.0)
198
+ rexml
197
199
  ruby-progressbar (~> 1.7)
198
- unicode-display_width (>= 1.4.0, < 1.7)
199
- rubocop-rspec (1.37.1)
200
+ unicode-display_width (>= 1.4.0, < 2.0)
201
+ rubocop-rspec (1.39.0)
200
202
  rubocop (>= 0.68.1)
201
203
  ruby-kafka (0.7.10)
202
204
  digest-crc
@@ -209,12 +211,12 @@ GEM
209
211
  actionpack (>= 4.0)
210
212
  activesupport (>= 4.0)
211
213
  sprockets (>= 3.0.0)
212
- sqlite3 (1.4.1)
214
+ sqlite3 (1.4.2)
213
215
  thor (1.0.1)
214
216
  thread_safe (0.3.6)
215
217
  tzinfo (1.2.7)
216
218
  thread_safe (~> 0.1)
217
- unicode-display_width (1.6.0)
219
+ unicode-display_width (1.7.0)
218
220
  websocket-driver (0.7.1)
219
221
  websocket-extensions (>= 0.1.0)
220
222
  websocket-extensions (0.1.4)
@@ -226,7 +228,6 @@ DEPENDENCIES
226
228
  activerecord (~> 5.2)
227
229
  activerecord-import
228
230
  avro (~> 1.9)
229
- bundler (~> 1)
230
231
  ddtrace (~> 0.11)
231
232
  deimos-ruby!
232
233
  dogstatsd-ruby (~> 4.2)
@@ -244,4 +245,4 @@ DEPENDENCIES
244
245
  sqlite3 (~> 1.3)
245
246
 
246
247
  BUNDLED WITH
247
- 1.17.3
248
+ 2.1.4
data/README.md CHANGED
@@ -23,6 +23,7 @@ Built on Phobos and hence Ruby-Kafka.
23
23
  * [Consumers](#consumers)
24
24
  * [Rails Integration](#rails-integration)
25
25
  * [Database Backend](#database-backend)
26
+ * [Database Poller](#database-poller)
26
27
  * [Running Consumers](#running-consumers)
27
28
  * [Metrics](#metrics)
28
29
  * [Testing](#testing)
@@ -557,6 +558,75 @@ class MyConsumer < Deimos::ActiveRecordConsumer
557
558
  end
558
559
  ```
559
560
 
561
+ ## Database Poller
562
+
563
+ Another method of fetching updates from the database to Kafka is by polling
564
+ the database (a process popularized by [Kafka Connect](https://docs.confluent.io/current/connect/index.html)).
565
+ Deimos provides a database poller, which allows you the same pattern but
566
+ with all the flexibility of real Ruby code, and the added advantage of having
567
+ a single consistent framework to talk to Kafka.
568
+
569
+ One of the disadvantages of polling the database is that it can't detect deletions.
570
+ You can get over this by configuring a mixin to send messages *only* on deletion,
571
+ and use the poller to handle all other updates. You can reuse the same producer
572
+ for both cases to handle joins, changes/mappings, business logic, etc.
573
+
574
+ To enable the poller, generate the migration:
575
+
576
+ ```ruby
577
+ rails g deimos:db_poller
578
+ ```
579
+
580
+ Run the migration:
581
+
582
+ ```ruby
583
+ rails db:migrate
584
+ ```
585
+
586
+ Add the following configuration:
587
+
588
+ ```ruby
589
+ Deimos.configure do
590
+ db_poller do
591
+ producer_class 'MyProducer' # an ActiveRecordProducer
592
+ end
593
+ db_poller do
594
+ producer_class 'MyOtherProducer'
595
+ run_every 2.minutes
596
+ delay 5.seconds # to allow for transactions to finish
597
+ full_table true # if set, dump the entire table every run; use for small tables
598
+ end
599
+ end
600
+ ```
601
+
602
+ All the information around connecting and querying the database lives in the
603
+ producer itself, so you don't need to write any additional code. You can
604
+ define one additional method on the producer:
605
+
606
+ ```ruby
607
+ class MyProducer < Deimos::ActiveRecordProducer
608
+ ...
609
+ def poll_query(time_from:, time_to:, column_name:, min_id:)
610
+ # Default is to use the timestamp `column_name` to find all records
611
+ # between time_from and time_to, or records where `updated_at` is equal to
612
+ # `time_from` but its ID is greater than `min_id`. This is called
613
+ # successively as the DB is polled to ensure even if a batch ends in the
614
+ # middle of a timestamp, we won't miss any records.
615
+ # You can override or change this behavior if necessary.
616
+ end
617
+ end
618
+ ```
619
+
620
+ To run the DB poller:
621
+
622
+ rake deimos:db_poller
623
+
624
+ Note that the DB poller creates one thread per configured poller, and is
625
+ currently designed *not* to be scaled out - i.e. it assumes you will only
626
+ have one process running at a time. If a particular poll takes longer than
627
+ the poll interval (i.e. interval is set at 1 minute but it takes 75 seconds)
628
+ the next poll will begin immediately following the first one completing.
629
+
560
630
  ## Running consumers
561
631
 
562
632
  Deimos includes a rake task. Once it's in your gemfile, just run
data/Rakefile CHANGED
@@ -6,7 +6,7 @@ begin
6
6
 
7
7
  RSpec::Core::RakeTask.new(:spec)
8
8
  task(default: :spec)
9
- rescue LoadError # rubocop:disable Lint/SuppressedException
9
+ rescue LoadError
10
10
  # no rspec available
11
11
  end
12
12
 
data/deimos-ruby.gemspec CHANGED
@@ -25,7 +25,6 @@ Gem::Specification.new do |spec|
25
25
  spec.add_development_dependency('activerecord', '~> 5.2')
26
26
  spec.add_development_dependency('activerecord-import')
27
27
  spec.add_development_dependency('avro', '~> 1.9')
28
- spec.add_development_dependency('bundler', '~> 1')
29
28
  spec.add_development_dependency('ddtrace', '~> 0.11')
30
29
  spec.add_development_dependency('dogstatsd-ruby', '~> 4.2')
31
30
  spec.add_development_dependency('guard', '~> 2')
@@ -89,6 +89,29 @@ offset_commit_threshold|0|Number of messages that can be processed before their
89
89
  heartbeat_interval|10|Interval between heartbeats; must be less than the session window.
90
90
  backoff|`(1000..60_000)`|Range representing the minimum and maximum number of milliseconds to back off after a consumer error.
91
91
 
92
+ ## Defining Database Pollers
93
+
94
+ These are used when polling the database via `rake deimos:db_poller`. You
95
+ can create a number of pollers, one per topic.
96
+
97
+ ```ruby
98
+ Deimos.configure do
99
+ db_poller do
100
+ producer_class 'MyProducer'
101
+ run_every 2.minutes
102
+ end
103
+ end
104
+ ```
105
+
106
+ Config name|Default|Description
107
+ -----------|-------|-----------
108
+ producer_class|nil|ActiveRecordProducer class to use for sending messages.
109
+ run_every|60|Amount of time in seconds to wait between runs.
110
+ timestamp_column|`:updated_at`|Name of the column to query. Remember to add an index to this column!
111
+ delay_time|2|Amount of time in seconds to wait before picking up records, to allow for transactions to finish.
112
+ full_table|false|If set to true, do a full table dump to Kafka each run. Good for very small tables.
113
+ start_from_beginning|true|If false, start from the current time instead of the beginning of time if this is the first time running the poller.
114
+
92
115
  ## Kafka Configuration
93
116
 
94
117
  Config name|Default|Description
@@ -59,6 +59,29 @@ module Deimos
59
59
  k.to_sym != :payload_key && !fields.map(&:name).include?(k)
60
60
  end
61
61
  end
62
+
63
+ # Query to use when polling the database with the DbPoller. Add
64
+ # includes, joins, or wheres as necessary, or replace entirely.
65
+ # @param time_from [Time] the time to start the query from.
66
+ # @param time_to [Time] the time to end the query.
67
+ # @param column_name [Symbol] the column name to look for.
68
+ # @param min_id [Numeric] the minimum ID (i.e. all IDs must be greater
69
+ # than this value).
70
+ # @return [ActiveRecord::Relation]
71
+ def poll_query(time_from:, time_to:, column_name: :updated_at, min_id:)
72
+ klass = config[:record_class]
73
+ table = ActiveRecord::Base.connection.quote_table_name(klass.table_name)
74
+ column = ActiveRecord::Base.connection.quote_column_name(column_name)
75
+ primary = ActiveRecord::Base.connection.quote_column_name(klass.primary_key)
76
+ klass.where(
77
+ "((#{table}.#{column} = ? AND #{table}.#{primary} > ?) \
78
+ OR #{table}.#{column} > ?) AND #{table}.#{column} <= ?",
79
+ time_from,
80
+ min_id,
81
+ time_from,
82
+ time_to
83
+ )
84
+ end
62
85
  end
63
86
  end
64
87
  end
@@ -340,6 +340,26 @@ module Deimos
340
340
  setting :heartbeat_interval
341
341
  end
342
342
 
343
+ setting_object :db_poller do
344
+ # Producer class to use for the poller.
345
+ setting :producer_class
346
+ # How often to run the poller, in seconds. If the poll takes longer than this
347
+ # time, it will run again immediately and the timeout
348
+ # will be pushed to the next e.g. 1 minute.
349
+ setting :run_every, 60
350
+ # Column to use to find updates. Must have an index on it.
351
+ setting :timestamp_column, :updated_at
352
+ # Amount of time, in seconds, to wait before catching updates, to allow transactions
353
+ # to complete but still pick up the right records.
354
+ setting :delay_time, 2
355
+ # If true, dump the full table rather than incremental changes. Should
356
+ # only be used for very small tables.
357
+ setting :full_table, false
358
+ # If false, start from the current time instead of the beginning of time
359
+ # if this is the first time running the poller.
360
+ setting :start_from_beginning, true
361
+ end
362
+
343
363
  deprecate 'kafka_logger', 'kafka.logger'
344
364
  deprecate 'reraise_consumer_errors', 'consumers.reraise_errors'
345
365
  deprecate 'schema_registry_url', 'schema.registry_url'
@@ -14,7 +14,7 @@ module Deimos
14
14
  # Try to create it - it's fine if it already exists
15
15
  begin
16
16
  self.create(topic: topic)
17
- rescue ActiveRecord::RecordNotUnique # rubocop:disable Lint/SuppressedException
17
+ rescue ActiveRecord::RecordNotUnique
18
18
  # continue on
19
19
  end
20
20
 
@@ -1,6 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # rubocop:disable Lint/UnusedMethodArgument
4
3
  module Deimos
5
4
  module Metrics
6
5
  # Base class for all metrics providers.
@@ -35,4 +34,3 @@ module Deimos
35
34
  end
36
35
  end
37
36
  end
38
- # rubocop:enable Lint/UnusedMethodArgument
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ # ActiveRecord class to record the last time we polled the database.
5
+ # For use with DbPoller.
6
+ class PollInfo < ActiveRecord::Base
7
+ self.table_name = 'deimos_poll_info'
8
+ end
9
+ end
@@ -1,6 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # rubocop:disable Lint/UnusedMethodArgument
4
3
  module Deimos
5
4
  module Tracing
6
5
  # Base class for all tracing providers.
@@ -28,4 +27,3 @@ module Deimos
28
27
  end
29
28
  end
30
29
  end
31
- # rubocop:enable Lint/UnusedMethodArgument
@@ -0,0 +1,149 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'deimos/poll_info'
4
+ require 'deimos/utils/executor'
5
+ require 'deimos/utils/signal_handler'
6
+
7
+ module Deimos
8
+ module Utils
9
+ # Class which continually polls the database and sends Kafka messages.
10
+ class DbPoller
11
+ BATCH_SIZE = 1000
12
+
13
+ # Needed for Executor so it can identify the worker
14
+ attr_reader :id
15
+
16
+ # Begin the DB Poller process.
17
+ def self.start!
18
+ if Deimos.config.db_poller_objects.empty?
19
+ raise('No pollers configured!')
20
+ end
21
+
22
+ pollers = Deimos.config.db_poller_objects.map do |poller_config|
23
+ self.new(poller_config)
24
+ end
25
+ executor = Deimos::Utils::Executor.new(pollers,
26
+ sleep_seconds: 5,
27
+ logger: Deimos.config.logger)
28
+ signal_handler = Deimos::Utils::SignalHandler.new(executor)
29
+ signal_handler.run!
30
+ end
31
+
32
+ # @param config [Deimos::Configuration::ConfigStruct]
33
+ def initialize(config)
34
+ @config = config
35
+ @id = SecureRandom.hex
36
+ begin
37
+ @producer = @config.producer_class.constantize
38
+ rescue NameError
39
+ raise "Class #{@config.producer_class} not found!"
40
+ end
41
+ unless @producer < Deimos::ActiveRecordProducer
42
+ raise "Class #{@producer.class.name} is not an ActiveRecordProducer!"
43
+ end
44
+ end
45
+
46
+ # Start the poll:
47
+ # 1) Grab the current PollInfo from the database indicating the last
48
+ # time we ran
49
+ # 2) On a loop, process all the recent updates between the last time
50
+ # we ran and now.
51
+ def start
52
+ # Don't send asynchronously
53
+ if Deimos.config.producers.backend == :kafka_async
54
+ Deimos.config.producers.backend = :kafka
55
+ end
56
+ Deimos.config.logger.info('Starting...')
57
+ @signal_to_stop = false
58
+ retrieve_poll_info
59
+ loop do
60
+ if @signal_to_stop
61
+ Deimos.config.logger.info('Shutting down')
62
+ break
63
+ end
64
+ process_updates
65
+ sleep 0.1
66
+ end
67
+ end
68
+
69
+ # Grab the PollInfo or create if it doesn't exist.
70
+ def retrieve_poll_info
71
+ ActiveRecord::Base.connection.reconnect!
72
+ new_time = @config.start_from_beginning ? Time.new(0) : Time.zone.now
73
+ @info = Deimos::PollInfo.find_by_producer(@config.producer_class) ||
74
+ Deimos::PollInfo.create!(producer: @config.producer_class,
75
+ last_sent: new_time,
76
+ last_sent_id: 0)
77
+ end
78
+
79
+ # Stop the poll.
80
+ def stop
81
+ Deimos.config.logger.info('Received signal to stop')
82
+ @signal_to_stop = true
83
+ end
84
+
85
+ # Indicate whether this current loop should process updates. Most loops
86
+ # will busy-wait (sleeping 0.1 seconds) until it's ready.
87
+ # @return [Boolean]
88
+ def should_run?
89
+ Time.zone.now - @info.last_sent - @config.delay_time >= @config.run_every
90
+ end
91
+
92
+ # @param record [ActiveRecord::Base]
93
+ # @return [ActiveSupport::TimeWithZone]
94
+ def last_updated(record)
95
+ record.public_send(@config.timestamp_column)
96
+ end
97
+
98
+ # Send messages for updated data.
99
+ def process_updates
100
+ return unless should_run?
101
+
102
+ time_from = @config.full_table ? Time.new(0) : @info.last_sent.in_time_zone
103
+ time_to = Time.zone.now - @config.delay_time
104
+ Deimos.config.logger.info("Polling #{@producer.topic} from #{time_from} to #{time_to}")
105
+ message_count = 0
106
+ batch_count = 0
107
+
108
+ # poll_query gets all the relevant data from the database, as defined
109
+ # by the producer itself.
110
+ loop do
111
+ Deimos.config.logger.debug("Polling #{@producer.topic}, batch #{batch_count + 1}")
112
+ batch = fetch_results(time_from, time_to).to_a
113
+ break if batch.empty?
114
+
115
+ batch_count += 1
116
+ process_batch(batch)
117
+ message_count += batch.size
118
+ time_from = last_updated(batch.last)
119
+ end
120
+ Deimos.config.logger.info("Poll #{@producer.topic} complete at #{time_to} (#{message_count} messages, #{batch_count} batches}")
121
+ end
122
+
123
+ # @param time_from [ActiveSupport::TimeWithZone]
124
+ # @param time_to [ActiveSupport::TimeWithZone]
125
+ # @return [ActiveRecord::Relation]
126
+ def fetch_results(time_from, time_to)
127
+ id = @producer.config[:record_class].primary_key
128
+ quoted_timestamp = ActiveRecord::Base.connection.quote_column_name(@config.timestamp_column)
129
+ quoted_id = ActiveRecord::Base.connection.quote_column_name(id)
130
+ @producer.poll_query(time_from: time_from,
131
+ time_to: time_to,
132
+ column_name: @config.timestamp_column,
133
+ min_id: @info.last_sent_id).
134
+ limit(BATCH_SIZE).
135
+ order("#{quoted_timestamp}, #{quoted_id}")
136
+ end
137
+
138
+ # @param batch [Array<ActiveRecord::Base>]
139
+ def process_batch(batch)
140
+ record = batch.last
141
+ id_method = record.class.primary_key
142
+ last_id = record.public_send(id_method)
143
+ last_updated_at = last_updated(record)
144
+ @producer.send_events(batch)
145
+ @info.update_attributes!(last_sent: last_updated_at, last_sent_id: last_id)
146
+ end
147
+ end
148
+ end
149
+ end