deimos-ruby 1.6.4 → 1.7.0.pre.beta1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.circleci/config.yml +9 -0
- data/.rubocop.yml +15 -13
- data/.ruby-version +1 -1
- data/CHANGELOG.md +3 -0
- data/Gemfile.lock +35 -34
- data/README.md +70 -0
- data/Rakefile +1 -1
- data/deimos-ruby.gemspec +0 -1
- data/docs/CONFIGURATION.md +23 -0
- data/lib/deimos/active_record_producer.rb +23 -0
- data/lib/deimos/config/configuration.rb +20 -0
- data/lib/deimos/kafka_topic_info.rb +1 -1
- data/lib/deimos/metrics/provider.rb +0 -2
- data/lib/deimos/poll_info.rb +9 -0
- data/lib/deimos/tracing/provider.rb +0 -2
- data/lib/deimos/utils/db_poller.rb +149 -0
- data/lib/deimos/utils/db_producer.rb +2 -1
- data/lib/deimos/utils/executor.rb +1 -1
- data/lib/deimos/version.rb +1 -1
- data/lib/deimos.rb +1 -0
- data/lib/generators/deimos/db_poller/templates/migration +11 -0
- data/lib/generators/deimos/db_poller/templates/rails3_migration +16 -0
- data/lib/generators/deimos/db_poller_generator.rb +48 -0
- data/lib/tasks/deimos.rake +7 -0
- data/spec/active_record_producer_spec.rb +66 -88
- data/spec/consumer_spec.rb +2 -2
- data/spec/producer_spec.rb +3 -3
- data/spec/rake_spec.rb +1 -1
- data/spec/spec_helper.rb +44 -6
- data/spec/utils/db_poller_spec.rb +320 -0
- metadata +13 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ab8ac284db2c98dac5624caf5bf75118ad89fb9ec6e1f3109f15373f2bf4c8be
|
4
|
+
data.tar.gz: 7d26a7d8d163ab4783638c9393bea2d1a7c8f364a7eed2ea4cb699cbcbafd244
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2610223a8d8c2546dad4037e4d2b1845e77372b304f4c072c269316bceed4a6626bd36d3541561d91e1c68a06ed891c051feb16a1800763cf805061c05cadc58
|
7
|
+
data.tar.gz: 3916fa546b45182b987b0409d51bcce96c5a1beee449722861819f76377258e7ccb5700f50d2c0d8d1791986ed4f5d19b7c989cdcdbe77d1289e8531eb24ba15
|
data/.circleci/config.yml
CHANGED
@@ -20,6 +20,9 @@ jobs:
|
|
20
20
|
# Bundle install dependencies in /tmp/
|
21
21
|
# so Dockerfile does not copy them since
|
22
22
|
# its base image is different than CircleCI
|
23
|
+
- run:
|
24
|
+
name: Install bundler
|
25
|
+
command: gem install bundler:2.1.4
|
23
26
|
- run:
|
24
27
|
name: Bundle install
|
25
28
|
command: bundle install --path vendor/bundle --jobs=4 --retry=3
|
@@ -40,6 +43,9 @@ jobs:
|
|
40
43
|
steps:
|
41
44
|
- attach_workspace:
|
42
45
|
at: ~/workspace
|
46
|
+
- run:
|
47
|
+
name: Install bundler
|
48
|
+
command: gem install bundler:2.1.4
|
43
49
|
- run:
|
44
50
|
name: Point bundle to vendor/bundle
|
45
51
|
command: bundle --path vendor/bundle
|
@@ -50,6 +56,9 @@ jobs:
|
|
50
56
|
steps:
|
51
57
|
- attach_workspace:
|
52
58
|
at: ~/workspace
|
59
|
+
- run:
|
60
|
+
name: Install bundler
|
61
|
+
command: gem install bundler:2.1.4
|
53
62
|
- run:
|
54
63
|
name: Point bundle to vendor/bundle
|
55
64
|
command: bundle --path vendor/bundle
|
data/.rubocop.yml
CHANGED
@@ -1,10 +1,11 @@
|
|
1
1
|
require: rubocop-rspec
|
2
2
|
|
3
3
|
AllCops:
|
4
|
-
TargetRubyVersion: 2.
|
4
|
+
TargetRubyVersion: 2.4
|
5
5
|
Exclude:
|
6
6
|
- lib/deimos/monkey_patches/*.rb
|
7
7
|
- vendor/**/*
|
8
|
+
NewCops: enable
|
8
9
|
|
9
10
|
# class Plumbus
|
10
11
|
# private
|
@@ -34,6 +35,12 @@ Layout/DotPosition:
|
|
34
35
|
Layout/EmptyLinesAroundBlockBody:
|
35
36
|
Enabled: false
|
36
37
|
|
38
|
+
Layout/LineLength:
|
39
|
+
Max: 100
|
40
|
+
Severity: refactor
|
41
|
+
Exclude:
|
42
|
+
- 'spec/**/*'
|
43
|
+
|
37
44
|
# foo = if expression
|
38
45
|
# 'bar'
|
39
46
|
# end
|
@@ -82,12 +89,6 @@ Metrics/CyclomaticComplexity:
|
|
82
89
|
Severity: refactor
|
83
90
|
Max: 20
|
84
91
|
|
85
|
-
Metrics/LineLength:
|
86
|
-
Max: 100
|
87
|
-
Severity: refactor
|
88
|
-
Exclude:
|
89
|
-
- 'spec/**/*'
|
90
|
-
|
91
92
|
Metrics/MethodLength:
|
92
93
|
Severity: refactor
|
93
94
|
Max: 30
|
@@ -123,12 +124,6 @@ Style/BlockDelimiters:
|
|
123
124
|
# some_method(x, y, {a: 1, b: 2})
|
124
125
|
# some_method(x, y, {a: 1, b: 2}, a: 1, b: 2)
|
125
126
|
|
126
|
-
# good
|
127
|
-
# some_method(x, y, a: 1, b: 2)
|
128
|
-
# some_method(x, y, {a: 1, b: 2}, {a: 1, b: 2})
|
129
|
-
Style/BracesAroundHashParameters:
|
130
|
-
EnforcedStyle: context_dependent
|
131
|
-
|
132
127
|
# Enable both this:
|
133
128
|
# MyModule::MyClass
|
134
129
|
# and this:
|
@@ -179,6 +174,13 @@ Style/GuardClause:
|
|
179
174
|
Style/HashSyntax:
|
180
175
|
EnforcedStyle: ruby19_no_mixed_keys
|
181
176
|
|
177
|
+
# We are still unofficially targeting Ruby 2.3
|
178
|
+
Style/HashTransformKeys:
|
179
|
+
Enabled: false
|
180
|
+
|
181
|
+
Style/HashTransformValues:
|
182
|
+
Enabled: false
|
183
|
+
|
182
184
|
Style/IfUnlessModifier:
|
183
185
|
Enabled: false
|
184
186
|
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.5.
|
1
|
+
2.5.3
|
data/CHANGELOG.md
CHANGED
@@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
7
7
|
|
8
8
|
## UNRELEASED
|
9
9
|
|
10
|
+
### Features :star:
|
11
|
+
- Added the DB Poller feature / process.
|
12
|
+
|
10
13
|
## 1.6.4 - 2020-05-11
|
11
14
|
- Fixed the payload logging fix for errored messages as well.
|
12
15
|
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
deimos-ruby (1.
|
4
|
+
deimos-ruby (1.7.0.pre.beta1)
|
5
5
|
avro_turf (~> 0.11)
|
6
6
|
phobos (~> 1.9)
|
7
7
|
ruby-kafka (~> 0.7)
|
@@ -41,7 +41,7 @@ GEM
|
|
41
41
|
activemodel (= 5.2.4.2)
|
42
42
|
activesupport (= 5.2.4.2)
|
43
43
|
arel (>= 9.0)
|
44
|
-
activerecord-import (1.0.
|
44
|
+
activerecord-import (1.0.4)
|
45
45
|
activerecord (>= 3.2)
|
46
46
|
activestorage (5.2.4.2)
|
47
47
|
actionpack (= 5.2.4.2)
|
@@ -54,30 +54,30 @@ GEM
|
|
54
54
|
tzinfo (~> 1.1)
|
55
55
|
arel (9.0.0)
|
56
56
|
ast (2.4.0)
|
57
|
-
avro (1.9.
|
57
|
+
avro (1.9.2)
|
58
58
|
multi_json
|
59
59
|
avro_turf (0.11.0)
|
60
60
|
avro (>= 1.7.7, < 1.10)
|
61
61
|
excon (~> 0.45)
|
62
62
|
builder (3.2.4)
|
63
63
|
coderay (1.1.2)
|
64
|
-
concurrent-ruby (1.1.
|
65
|
-
concurrent-ruby-ext (1.1.
|
66
|
-
concurrent-ruby (= 1.1.
|
64
|
+
concurrent-ruby (1.1.6)
|
65
|
+
concurrent-ruby-ext (1.1.6)
|
66
|
+
concurrent-ruby (= 1.1.6)
|
67
67
|
crass (1.0.6)
|
68
|
-
ddtrace (0.
|
68
|
+
ddtrace (0.35.1)
|
69
69
|
msgpack
|
70
70
|
diff-lcs (1.3)
|
71
71
|
digest-crc (0.5.1)
|
72
|
-
dogstatsd-ruby (4.
|
72
|
+
dogstatsd-ruby (4.8.0)
|
73
73
|
erubi (1.9.0)
|
74
74
|
excon (0.73.0)
|
75
75
|
exponential-backoff (0.0.4)
|
76
|
-
ffi (1.
|
76
|
+
ffi (1.12.2)
|
77
77
|
formatador (0.2.5)
|
78
78
|
globalid (0.4.2)
|
79
79
|
activesupport (>= 4.2.0)
|
80
|
-
guard (2.16.
|
80
|
+
guard (2.16.2)
|
81
81
|
formatador (>= 0.2.4)
|
82
82
|
listen (>= 2.7, < 4.0)
|
83
83
|
lumberjack (>= 1.0.12, < 2.0)
|
@@ -107,17 +107,17 @@ GEM
|
|
107
107
|
loofah (2.5.0)
|
108
108
|
crass (~> 1.0.2)
|
109
109
|
nokogiri (>= 1.5.9)
|
110
|
-
lumberjack (1.
|
110
|
+
lumberjack (1.2.4)
|
111
111
|
mail (2.7.1)
|
112
112
|
mini_mime (>= 0.1.1)
|
113
113
|
marcel (0.3.3)
|
114
114
|
mimemagic (~> 0.3.2)
|
115
|
-
method_source (0.
|
116
|
-
mimemagic (0.3.
|
115
|
+
method_source (1.0.0)
|
116
|
+
mimemagic (0.3.5)
|
117
117
|
mini_mime (1.0.2)
|
118
118
|
mini_portile2 (2.4.0)
|
119
119
|
minitest (5.14.0)
|
120
|
-
msgpack (1.3.
|
120
|
+
msgpack (1.3.3)
|
121
121
|
multi_json (1.14.1)
|
122
122
|
mysql2 (0.5.3)
|
123
123
|
nenv (0.3.0)
|
@@ -128,9 +128,9 @@ GEM
|
|
128
128
|
nenv (~> 0.1)
|
129
129
|
shellany (~> 0.0)
|
130
130
|
parallel (1.19.1)
|
131
|
-
parser (2.
|
131
|
+
parser (2.7.1.2)
|
132
132
|
ast (~> 2.4.0)
|
133
|
-
pg (1.
|
133
|
+
pg (1.2.3)
|
134
134
|
phobos (1.9.0)
|
135
135
|
activesupport (>= 3.0.0)
|
136
136
|
concurrent-ruby (>= 1.0.2)
|
@@ -139,9 +139,9 @@ GEM
|
|
139
139
|
logging
|
140
140
|
ruby-kafka
|
141
141
|
thor
|
142
|
-
pry (0.
|
143
|
-
coderay (~> 1.1
|
144
|
-
method_source (~>
|
142
|
+
pry (0.13.1)
|
143
|
+
coderay (~> 1.1)
|
144
|
+
method_source (~> 1.0)
|
145
145
|
rack (2.2.2)
|
146
146
|
rack-test (1.1.0)
|
147
147
|
rack (>= 1.0, < 3)
|
@@ -171,32 +171,34 @@ GEM
|
|
171
171
|
thor (>= 0.19.0, < 2.0)
|
172
172
|
rainbow (3.0.0)
|
173
173
|
rake (13.0.1)
|
174
|
-
rb-fsevent (0.10.
|
175
|
-
rb-inotify (0.10.
|
174
|
+
rb-fsevent (0.10.4)
|
175
|
+
rb-inotify (0.10.1)
|
176
176
|
ffi (~> 1.0)
|
177
|
+
rexml (3.2.4)
|
177
178
|
rspec (3.9.0)
|
178
179
|
rspec-core (~> 3.9.0)
|
179
180
|
rspec-expectations (~> 3.9.0)
|
180
181
|
rspec-mocks (~> 3.9.0)
|
181
|
-
rspec-core (3.9.
|
182
|
-
rspec-support (~> 3.9.
|
183
|
-
rspec-expectations (3.9.
|
182
|
+
rspec-core (3.9.2)
|
183
|
+
rspec-support (~> 3.9.3)
|
184
|
+
rspec-expectations (3.9.1)
|
184
185
|
diff-lcs (>= 1.2.0, < 2.0)
|
185
186
|
rspec-support (~> 3.9.0)
|
186
|
-
rspec-mocks (3.9.
|
187
|
+
rspec-mocks (3.9.1)
|
187
188
|
diff-lcs (>= 1.2.0, < 2.0)
|
188
189
|
rspec-support (~> 3.9.0)
|
189
|
-
rspec-support (3.9.
|
190
|
+
rspec-support (3.9.3)
|
190
191
|
rspec_junit_formatter (0.4.1)
|
191
192
|
rspec-core (>= 2, < 4, != 2.12.0)
|
192
|
-
rubocop (0.
|
193
|
+
rubocop (0.82.0)
|
193
194
|
jaro_winkler (~> 1.5.1)
|
194
195
|
parallel (~> 1.10)
|
195
|
-
parser (>= 2.
|
196
|
+
parser (>= 2.7.0.1)
|
196
197
|
rainbow (>= 2.2.2, < 4.0)
|
198
|
+
rexml
|
197
199
|
ruby-progressbar (~> 1.7)
|
198
|
-
unicode-display_width (>= 1.4.0, <
|
199
|
-
rubocop-rspec (1.
|
200
|
+
unicode-display_width (>= 1.4.0, < 2.0)
|
201
|
+
rubocop-rspec (1.39.0)
|
200
202
|
rubocop (>= 0.68.1)
|
201
203
|
ruby-kafka (0.7.10)
|
202
204
|
digest-crc
|
@@ -209,12 +211,12 @@ GEM
|
|
209
211
|
actionpack (>= 4.0)
|
210
212
|
activesupport (>= 4.0)
|
211
213
|
sprockets (>= 3.0.0)
|
212
|
-
sqlite3 (1.4.
|
214
|
+
sqlite3 (1.4.2)
|
213
215
|
thor (1.0.1)
|
214
216
|
thread_safe (0.3.6)
|
215
217
|
tzinfo (1.2.7)
|
216
218
|
thread_safe (~> 0.1)
|
217
|
-
unicode-display_width (1.
|
219
|
+
unicode-display_width (1.7.0)
|
218
220
|
websocket-driver (0.7.1)
|
219
221
|
websocket-extensions (>= 0.1.0)
|
220
222
|
websocket-extensions (0.1.4)
|
@@ -226,7 +228,6 @@ DEPENDENCIES
|
|
226
228
|
activerecord (~> 5.2)
|
227
229
|
activerecord-import
|
228
230
|
avro (~> 1.9)
|
229
|
-
bundler (~> 1)
|
230
231
|
ddtrace (~> 0.11)
|
231
232
|
deimos-ruby!
|
232
233
|
dogstatsd-ruby (~> 4.2)
|
@@ -244,4 +245,4 @@ DEPENDENCIES
|
|
244
245
|
sqlite3 (~> 1.3)
|
245
246
|
|
246
247
|
BUNDLED WITH
|
247
|
-
1.
|
248
|
+
2.1.4
|
data/README.md
CHANGED
@@ -23,6 +23,7 @@ Built on Phobos and hence Ruby-Kafka.
|
|
23
23
|
* [Consumers](#consumers)
|
24
24
|
* [Rails Integration](#rails-integration)
|
25
25
|
* [Database Backend](#database-backend)
|
26
|
+
* [Database Poller](#database-poller)
|
26
27
|
* [Running Consumers](#running-consumers)
|
27
28
|
* [Metrics](#metrics)
|
28
29
|
* [Testing](#testing)
|
@@ -557,6 +558,75 @@ class MyConsumer < Deimos::ActiveRecordConsumer
|
|
557
558
|
end
|
558
559
|
```
|
559
560
|
|
561
|
+
## Database Poller
|
562
|
+
|
563
|
+
Another method of fetching updates from the database to Kafka is by polling
|
564
|
+
the database (a process popularized by [Kafka Connect](https://docs.confluent.io/current/connect/index.html)).
|
565
|
+
Deimos provides a database poller, which allows you the same pattern but
|
566
|
+
with all the flexibility of real Ruby code, and the added advantage of having
|
567
|
+
a single consistent framework to talk to Kafka.
|
568
|
+
|
569
|
+
One of the disadvantages of polling the database is that it can't detect deletions.
|
570
|
+
You can get over this by configuring a mixin to send messages *only* on deletion,
|
571
|
+
and use the poller to handle all other updates. You can reuse the same producer
|
572
|
+
for both cases to handle joins, changes/mappings, business logic, etc.
|
573
|
+
|
574
|
+
To enable the poller, generate the migration:
|
575
|
+
|
576
|
+
```ruby
|
577
|
+
rails g deimos:db_poller
|
578
|
+
```
|
579
|
+
|
580
|
+
Run the migration:
|
581
|
+
|
582
|
+
```ruby
|
583
|
+
rails db:migrate
|
584
|
+
```
|
585
|
+
|
586
|
+
Add the following configuration:
|
587
|
+
|
588
|
+
```ruby
|
589
|
+
Deimos.configure do
|
590
|
+
db_poller do
|
591
|
+
producer_class 'MyProducer' # an ActiveRecordProducer
|
592
|
+
end
|
593
|
+
db_poller do
|
594
|
+
producer_class 'MyOtherProducer'
|
595
|
+
run_every 2.minutes
|
596
|
+
delay 5.seconds # to allow for transactions to finish
|
597
|
+
full_table true # if set, dump the entire table every run; use for small tables
|
598
|
+
end
|
599
|
+
end
|
600
|
+
```
|
601
|
+
|
602
|
+
All the information around connecting and querying the database lives in the
|
603
|
+
producer itself, so you don't need to write any additional code. You can
|
604
|
+
define one additional method on the producer:
|
605
|
+
|
606
|
+
```ruby
|
607
|
+
class MyProducer < Deimos::ActiveRecordProducer
|
608
|
+
...
|
609
|
+
def poll_query(time_from:, time_to:, column_name:, min_id:)
|
610
|
+
# Default is to use the timestamp `column_name` to find all records
|
611
|
+
# between time_from and time_to, or records where `updated_at` is equal to
|
612
|
+
# `time_from` but its ID is greater than `min_id`. This is called
|
613
|
+
# successively as the DB is polled to ensure even if a batch ends in the
|
614
|
+
# middle of a timestamp, we won't miss any records.
|
615
|
+
# You can override or change this behavior if necessary.
|
616
|
+
end
|
617
|
+
end
|
618
|
+
```
|
619
|
+
|
620
|
+
To run the DB poller:
|
621
|
+
|
622
|
+
rake deimos:db_poller
|
623
|
+
|
624
|
+
Note that the DB poller creates one thread per configured poller, and is
|
625
|
+
currently designed *not* to be scaled out - i.e. it assumes you will only
|
626
|
+
have one process running at a time. If a particular poll takes longer than
|
627
|
+
the poll interval (i.e. interval is set at 1 minute but it takes 75 seconds)
|
628
|
+
the next poll will begin immediately following the first one completing.
|
629
|
+
|
560
630
|
## Running consumers
|
561
631
|
|
562
632
|
Deimos includes a rake task. Once it's in your gemfile, just run
|
data/Rakefile
CHANGED
data/deimos-ruby.gemspec
CHANGED
@@ -25,7 +25,6 @@ Gem::Specification.new do |spec|
|
|
25
25
|
spec.add_development_dependency('activerecord', '~> 5.2')
|
26
26
|
spec.add_development_dependency('activerecord-import')
|
27
27
|
spec.add_development_dependency('avro', '~> 1.9')
|
28
|
-
spec.add_development_dependency('bundler', '~> 1')
|
29
28
|
spec.add_development_dependency('ddtrace', '~> 0.11')
|
30
29
|
spec.add_development_dependency('dogstatsd-ruby', '~> 4.2')
|
31
30
|
spec.add_development_dependency('guard', '~> 2')
|
data/docs/CONFIGURATION.md
CHANGED
@@ -89,6 +89,29 @@ offset_commit_threshold|0|Number of messages that can be processed before their
|
|
89
89
|
heartbeat_interval|10|Interval between heartbeats; must be less than the session window.
|
90
90
|
backoff|`(1000..60_000)`|Range representing the minimum and maximum number of milliseconds to back off after a consumer error.
|
91
91
|
|
92
|
+
## Defining Database Pollers
|
93
|
+
|
94
|
+
These are used when polling the database via `rake deimos:db_poller`. You
|
95
|
+
can create a number of pollers, one per topic.
|
96
|
+
|
97
|
+
```ruby
|
98
|
+
Deimos.configure do
|
99
|
+
db_poller do
|
100
|
+
producer_class 'MyProducer'
|
101
|
+
run_every 2.minutes
|
102
|
+
end
|
103
|
+
end
|
104
|
+
```
|
105
|
+
|
106
|
+
Config name|Default|Description
|
107
|
+
-----------|-------|-----------
|
108
|
+
producer_class|nil|ActiveRecordProducer class to use for sending messages.
|
109
|
+
run_every|60|Amount of time in seconds to wait between runs.
|
110
|
+
timestamp_column|`:updated_at`|Name of the column to query. Remember to add an index to this column!
|
111
|
+
delay_time|2|Amount of time in seconds to wait before picking up records, to allow for transactions to finish.
|
112
|
+
full_table|false|If set to true, do a full table dump to Kafka each run. Good for very small tables.
|
113
|
+
start_from_beginning|true|If false, start from the current time instead of the beginning of time if this is the first time running the poller.
|
114
|
+
|
92
115
|
## Kafka Configuration
|
93
116
|
|
94
117
|
Config name|Default|Description
|
@@ -59,6 +59,29 @@ module Deimos
|
|
59
59
|
k.to_sym != :payload_key && !fields.map(&:name).include?(k)
|
60
60
|
end
|
61
61
|
end
|
62
|
+
|
63
|
+
# Query to use when polling the database with the DbPoller. Add
|
64
|
+
# includes, joins, or wheres as necessary, or replace entirely.
|
65
|
+
# @param time_from [Time] the time to start the query from.
|
66
|
+
# @param time_to [Time] the time to end the query.
|
67
|
+
# @param column_name [Symbol] the column name to look for.
|
68
|
+
# @param min_id [Numeric] the minimum ID (i.e. all IDs must be greater
|
69
|
+
# than this value).
|
70
|
+
# @return [ActiveRecord::Relation]
|
71
|
+
def poll_query(time_from:, time_to:, column_name: :updated_at, min_id:)
|
72
|
+
klass = config[:record_class]
|
73
|
+
table = ActiveRecord::Base.connection.quote_table_name(klass.table_name)
|
74
|
+
column = ActiveRecord::Base.connection.quote_column_name(column_name)
|
75
|
+
primary = ActiveRecord::Base.connection.quote_column_name(klass.primary_key)
|
76
|
+
klass.where(
|
77
|
+
"((#{table}.#{column} = ? AND #{table}.#{primary} > ?) \
|
78
|
+
OR #{table}.#{column} > ?) AND #{table}.#{column} <= ?",
|
79
|
+
time_from,
|
80
|
+
min_id,
|
81
|
+
time_from,
|
82
|
+
time_to
|
83
|
+
)
|
84
|
+
end
|
62
85
|
end
|
63
86
|
end
|
64
87
|
end
|
@@ -340,6 +340,26 @@ module Deimos
|
|
340
340
|
setting :heartbeat_interval
|
341
341
|
end
|
342
342
|
|
343
|
+
setting_object :db_poller do
|
344
|
+
# Producer class to use for the poller.
|
345
|
+
setting :producer_class
|
346
|
+
# How often to run the poller, in seconds. If the poll takes longer than this
|
347
|
+
# time, it will run again immediately and the timeout
|
348
|
+
# will be pushed to the next e.g. 1 minute.
|
349
|
+
setting :run_every, 60
|
350
|
+
# Column to use to find updates. Must have an index on it.
|
351
|
+
setting :timestamp_column, :updated_at
|
352
|
+
# Amount of time, in seconds, to wait before catching updates, to allow transactions
|
353
|
+
# to complete but still pick up the right records.
|
354
|
+
setting :delay_time, 2
|
355
|
+
# If true, dump the full table rather than incremental changes. Should
|
356
|
+
# only be used for very small tables.
|
357
|
+
setting :full_table, false
|
358
|
+
# If false, start from the current time instead of the beginning of time
|
359
|
+
# if this is the first time running the poller.
|
360
|
+
setting :start_from_beginning, true
|
361
|
+
end
|
362
|
+
|
343
363
|
deprecate 'kafka_logger', 'kafka.logger'
|
344
364
|
deprecate 'reraise_consumer_errors', 'consumers.reraise_errors'
|
345
365
|
deprecate 'schema_registry_url', 'schema.registry_url'
|
@@ -0,0 +1,149 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'deimos/poll_info'
|
4
|
+
require 'deimos/utils/executor'
|
5
|
+
require 'deimos/utils/signal_handler'
|
6
|
+
|
7
|
+
module Deimos
|
8
|
+
module Utils
|
9
|
+
# Class which continually polls the database and sends Kafka messages.
|
10
|
+
class DbPoller
|
11
|
+
BATCH_SIZE = 1000
|
12
|
+
|
13
|
+
# Needed for Executor so it can identify the worker
|
14
|
+
attr_reader :id
|
15
|
+
|
16
|
+
# Begin the DB Poller process.
|
17
|
+
def self.start!
|
18
|
+
if Deimos.config.db_poller_objects.empty?
|
19
|
+
raise('No pollers configured!')
|
20
|
+
end
|
21
|
+
|
22
|
+
pollers = Deimos.config.db_poller_objects.map do |poller_config|
|
23
|
+
self.new(poller_config)
|
24
|
+
end
|
25
|
+
executor = Deimos::Utils::Executor.new(pollers,
|
26
|
+
sleep_seconds: 5,
|
27
|
+
logger: Deimos.config.logger)
|
28
|
+
signal_handler = Deimos::Utils::SignalHandler.new(executor)
|
29
|
+
signal_handler.run!
|
30
|
+
end
|
31
|
+
|
32
|
+
# @param config [Deimos::Configuration::ConfigStruct]
|
33
|
+
def initialize(config)
|
34
|
+
@config = config
|
35
|
+
@id = SecureRandom.hex
|
36
|
+
begin
|
37
|
+
@producer = @config.producer_class.constantize
|
38
|
+
rescue NameError
|
39
|
+
raise "Class #{@config.producer_class} not found!"
|
40
|
+
end
|
41
|
+
unless @producer < Deimos::ActiveRecordProducer
|
42
|
+
raise "Class #{@producer.class.name} is not an ActiveRecordProducer!"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# Start the poll:
|
47
|
+
# 1) Grab the current PollInfo from the database indicating the last
|
48
|
+
# time we ran
|
49
|
+
# 2) On a loop, process all the recent updates between the last time
|
50
|
+
# we ran and now.
|
51
|
+
def start
|
52
|
+
# Don't send asynchronously
|
53
|
+
if Deimos.config.producers.backend == :kafka_async
|
54
|
+
Deimos.config.producers.backend = :kafka
|
55
|
+
end
|
56
|
+
Deimos.config.logger.info('Starting...')
|
57
|
+
@signal_to_stop = false
|
58
|
+
retrieve_poll_info
|
59
|
+
loop do
|
60
|
+
if @signal_to_stop
|
61
|
+
Deimos.config.logger.info('Shutting down')
|
62
|
+
break
|
63
|
+
end
|
64
|
+
process_updates
|
65
|
+
sleep 0.1
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
# Grab the PollInfo or create if it doesn't exist.
|
70
|
+
def retrieve_poll_info
|
71
|
+
ActiveRecord::Base.connection.reconnect!
|
72
|
+
new_time = @config.start_from_beginning ? Time.new(0) : Time.zone.now
|
73
|
+
@info = Deimos::PollInfo.find_by_producer(@config.producer_class) ||
|
74
|
+
Deimos::PollInfo.create!(producer: @config.producer_class,
|
75
|
+
last_sent: new_time,
|
76
|
+
last_sent_id: 0)
|
77
|
+
end
|
78
|
+
|
79
|
+
# Stop the poll.
|
80
|
+
def stop
|
81
|
+
Deimos.config.logger.info('Received signal to stop')
|
82
|
+
@signal_to_stop = true
|
83
|
+
end
|
84
|
+
|
85
|
+
# Indicate whether this current loop should process updates. Most loops
|
86
|
+
# will busy-wait (sleeping 0.1 seconds) until it's ready.
|
87
|
+
# @return [Boolean]
|
88
|
+
def should_run?
|
89
|
+
Time.zone.now - @info.last_sent - @config.delay_time >= @config.run_every
|
90
|
+
end
|
91
|
+
|
92
|
+
# @param record [ActiveRecord::Base]
|
93
|
+
# @return [ActiveSupport::TimeWithZone]
|
94
|
+
def last_updated(record)
|
95
|
+
record.public_send(@config.timestamp_column)
|
96
|
+
end
|
97
|
+
|
98
|
+
# Send messages for updated data.
|
99
|
+
def process_updates
|
100
|
+
return unless should_run?
|
101
|
+
|
102
|
+
time_from = @config.full_table ? Time.new(0) : @info.last_sent.in_time_zone
|
103
|
+
time_to = Time.zone.now - @config.delay_time
|
104
|
+
Deimos.config.logger.info("Polling #{@producer.topic} from #{time_from} to #{time_to}")
|
105
|
+
message_count = 0
|
106
|
+
batch_count = 0
|
107
|
+
|
108
|
+
# poll_query gets all the relevant data from the database, as defined
|
109
|
+
# by the producer itself.
|
110
|
+
loop do
|
111
|
+
Deimos.config.logger.debug("Polling #{@producer.topic}, batch #{batch_count + 1}")
|
112
|
+
batch = fetch_results(time_from, time_to).to_a
|
113
|
+
break if batch.empty?
|
114
|
+
|
115
|
+
batch_count += 1
|
116
|
+
process_batch(batch)
|
117
|
+
message_count += batch.size
|
118
|
+
time_from = last_updated(batch.last)
|
119
|
+
end
|
120
|
+
Deimos.config.logger.info("Poll #{@producer.topic} complete at #{time_to} (#{message_count} messages, #{batch_count} batches}")
|
121
|
+
end
|
122
|
+
|
123
|
+
# @param time_from [ActiveSupport::TimeWithZone]
|
124
|
+
# @param time_to [ActiveSupport::TimeWithZone]
|
125
|
+
# @return [ActiveRecord::Relation]
|
126
|
+
def fetch_results(time_from, time_to)
|
127
|
+
id = @producer.config[:record_class].primary_key
|
128
|
+
quoted_timestamp = ActiveRecord::Base.connection.quote_column_name(@config.timestamp_column)
|
129
|
+
quoted_id = ActiveRecord::Base.connection.quote_column_name(id)
|
130
|
+
@producer.poll_query(time_from: time_from,
|
131
|
+
time_to: time_to,
|
132
|
+
column_name: @config.timestamp_column,
|
133
|
+
min_id: @info.last_sent_id).
|
134
|
+
limit(BATCH_SIZE).
|
135
|
+
order("#{quoted_timestamp}, #{quoted_id}")
|
136
|
+
end
|
137
|
+
|
138
|
+
# @param batch [Array<ActiveRecord::Base>]
|
139
|
+
def process_batch(batch)
|
140
|
+
record = batch.last
|
141
|
+
id_method = record.class.primary_key
|
142
|
+
last_id = record.public_send(id_method)
|
143
|
+
last_updated_at = last_updated(record)
|
144
|
+
@producer.send_events(batch)
|
145
|
+
@info.update_attributes!(last_sent: last_updated_at, last_sent_id: last_id)
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|