deimos-ruby 1.6.4 → 1.7.0.pre.beta1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.circleci/config.yml +9 -0
- data/.rubocop.yml +15 -13
- data/.ruby-version +1 -1
- data/CHANGELOG.md +3 -0
- data/Gemfile.lock +35 -34
- data/README.md +70 -0
- data/Rakefile +1 -1
- data/deimos-ruby.gemspec +0 -1
- data/docs/CONFIGURATION.md +23 -0
- data/lib/deimos/active_record_producer.rb +23 -0
- data/lib/deimos/config/configuration.rb +20 -0
- data/lib/deimos/kafka_topic_info.rb +1 -1
- data/lib/deimos/metrics/provider.rb +0 -2
- data/lib/deimos/poll_info.rb +9 -0
- data/lib/deimos/tracing/provider.rb +0 -2
- data/lib/deimos/utils/db_poller.rb +149 -0
- data/lib/deimos/utils/db_producer.rb +2 -1
- data/lib/deimos/utils/executor.rb +1 -1
- data/lib/deimos/version.rb +1 -1
- data/lib/deimos.rb +1 -0
- data/lib/generators/deimos/db_poller/templates/migration +11 -0
- data/lib/generators/deimos/db_poller/templates/rails3_migration +16 -0
- data/lib/generators/deimos/db_poller_generator.rb +48 -0
- data/lib/tasks/deimos.rake +7 -0
- data/spec/active_record_producer_spec.rb +66 -88
- data/spec/consumer_spec.rb +2 -2
- data/spec/producer_spec.rb +3 -3
- data/spec/rake_spec.rb +1 -1
- data/spec/spec_helper.rb +44 -6
- data/spec/utils/db_poller_spec.rb +320 -0
- metadata +13 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ab8ac284db2c98dac5624caf5bf75118ad89fb9ec6e1f3109f15373f2bf4c8be
|
4
|
+
data.tar.gz: 7d26a7d8d163ab4783638c9393bea2d1a7c8f364a7eed2ea4cb699cbcbafd244
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2610223a8d8c2546dad4037e4d2b1845e77372b304f4c072c269316bceed4a6626bd36d3541561d91e1c68a06ed891c051feb16a1800763cf805061c05cadc58
|
7
|
+
data.tar.gz: 3916fa546b45182b987b0409d51bcce96c5a1beee449722861819f76377258e7ccb5700f50d2c0d8d1791986ed4f5d19b7c989cdcdbe77d1289e8531eb24ba15
|
data/.circleci/config.yml
CHANGED
@@ -20,6 +20,9 @@ jobs:
|
|
20
20
|
# Bundle install dependencies in /tmp/
|
21
21
|
# so Dockerfile does not copy them since
|
22
22
|
# its base image is different than CircleCI
|
23
|
+
- run:
|
24
|
+
name: Install bundler
|
25
|
+
command: gem install bundler:2.1.4
|
23
26
|
- run:
|
24
27
|
name: Bundle install
|
25
28
|
command: bundle install --path vendor/bundle --jobs=4 --retry=3
|
@@ -40,6 +43,9 @@ jobs:
|
|
40
43
|
steps:
|
41
44
|
- attach_workspace:
|
42
45
|
at: ~/workspace
|
46
|
+
- run:
|
47
|
+
name: Install bundler
|
48
|
+
command: gem install bundler:2.1.4
|
43
49
|
- run:
|
44
50
|
name: Point bundle to vendor/bundle
|
45
51
|
command: bundle --path vendor/bundle
|
@@ -50,6 +56,9 @@ jobs:
|
|
50
56
|
steps:
|
51
57
|
- attach_workspace:
|
52
58
|
at: ~/workspace
|
59
|
+
- run:
|
60
|
+
name: Install bundler
|
61
|
+
command: gem install bundler:2.1.4
|
53
62
|
- run:
|
54
63
|
name: Point bundle to vendor/bundle
|
55
64
|
command: bundle --path vendor/bundle
|
data/.rubocop.yml
CHANGED
@@ -1,10 +1,11 @@
|
|
1
1
|
require: rubocop-rspec
|
2
2
|
|
3
3
|
AllCops:
|
4
|
-
TargetRubyVersion: 2.
|
4
|
+
TargetRubyVersion: 2.4
|
5
5
|
Exclude:
|
6
6
|
- lib/deimos/monkey_patches/*.rb
|
7
7
|
- vendor/**/*
|
8
|
+
NewCops: enable
|
8
9
|
|
9
10
|
# class Plumbus
|
10
11
|
# private
|
@@ -34,6 +35,12 @@ Layout/DotPosition:
|
|
34
35
|
Layout/EmptyLinesAroundBlockBody:
|
35
36
|
Enabled: false
|
36
37
|
|
38
|
+
Layout/LineLength:
|
39
|
+
Max: 100
|
40
|
+
Severity: refactor
|
41
|
+
Exclude:
|
42
|
+
- 'spec/**/*'
|
43
|
+
|
37
44
|
# foo = if expression
|
38
45
|
# 'bar'
|
39
46
|
# end
|
@@ -82,12 +89,6 @@ Metrics/CyclomaticComplexity:
|
|
82
89
|
Severity: refactor
|
83
90
|
Max: 20
|
84
91
|
|
85
|
-
Metrics/LineLength:
|
86
|
-
Max: 100
|
87
|
-
Severity: refactor
|
88
|
-
Exclude:
|
89
|
-
- 'spec/**/*'
|
90
|
-
|
91
92
|
Metrics/MethodLength:
|
92
93
|
Severity: refactor
|
93
94
|
Max: 30
|
@@ -123,12 +124,6 @@ Style/BlockDelimiters:
|
|
123
124
|
# some_method(x, y, {a: 1, b: 2})
|
124
125
|
# some_method(x, y, {a: 1, b: 2}, a: 1, b: 2)
|
125
126
|
|
126
|
-
# good
|
127
|
-
# some_method(x, y, a: 1, b: 2)
|
128
|
-
# some_method(x, y, {a: 1, b: 2}, {a: 1, b: 2})
|
129
|
-
Style/BracesAroundHashParameters:
|
130
|
-
EnforcedStyle: context_dependent
|
131
|
-
|
132
127
|
# Enable both this:
|
133
128
|
# MyModule::MyClass
|
134
129
|
# and this:
|
@@ -179,6 +174,13 @@ Style/GuardClause:
|
|
179
174
|
Style/HashSyntax:
|
180
175
|
EnforcedStyle: ruby19_no_mixed_keys
|
181
176
|
|
177
|
+
# We are still unofficially targeting Ruby 2.3
|
178
|
+
Style/HashTransformKeys:
|
179
|
+
Enabled: false
|
180
|
+
|
181
|
+
Style/HashTransformValues:
|
182
|
+
Enabled: false
|
183
|
+
|
182
184
|
Style/IfUnlessModifier:
|
183
185
|
Enabled: false
|
184
186
|
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.5.
|
1
|
+
2.5.3
|
data/CHANGELOG.md
CHANGED
@@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
7
7
|
|
8
8
|
## UNRELEASED
|
9
9
|
|
10
|
+
### Features :star:
|
11
|
+
- Added the DB Poller feature / process.
|
12
|
+
|
10
13
|
## 1.6.4 - 2020-05-11
|
11
14
|
- Fixed the payload logging fix for errored messages as well.
|
12
15
|
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
deimos-ruby (1.
|
4
|
+
deimos-ruby (1.7.0.pre.beta1)
|
5
5
|
avro_turf (~> 0.11)
|
6
6
|
phobos (~> 1.9)
|
7
7
|
ruby-kafka (~> 0.7)
|
@@ -41,7 +41,7 @@ GEM
|
|
41
41
|
activemodel (= 5.2.4.2)
|
42
42
|
activesupport (= 5.2.4.2)
|
43
43
|
arel (>= 9.0)
|
44
|
-
activerecord-import (1.0.
|
44
|
+
activerecord-import (1.0.4)
|
45
45
|
activerecord (>= 3.2)
|
46
46
|
activestorage (5.2.4.2)
|
47
47
|
actionpack (= 5.2.4.2)
|
@@ -54,30 +54,30 @@ GEM
|
|
54
54
|
tzinfo (~> 1.1)
|
55
55
|
arel (9.0.0)
|
56
56
|
ast (2.4.0)
|
57
|
-
avro (1.9.
|
57
|
+
avro (1.9.2)
|
58
58
|
multi_json
|
59
59
|
avro_turf (0.11.0)
|
60
60
|
avro (>= 1.7.7, < 1.10)
|
61
61
|
excon (~> 0.45)
|
62
62
|
builder (3.2.4)
|
63
63
|
coderay (1.1.2)
|
64
|
-
concurrent-ruby (1.1.
|
65
|
-
concurrent-ruby-ext (1.1.
|
66
|
-
concurrent-ruby (= 1.1.
|
64
|
+
concurrent-ruby (1.1.6)
|
65
|
+
concurrent-ruby-ext (1.1.6)
|
66
|
+
concurrent-ruby (= 1.1.6)
|
67
67
|
crass (1.0.6)
|
68
|
-
ddtrace (0.
|
68
|
+
ddtrace (0.35.1)
|
69
69
|
msgpack
|
70
70
|
diff-lcs (1.3)
|
71
71
|
digest-crc (0.5.1)
|
72
|
-
dogstatsd-ruby (4.
|
72
|
+
dogstatsd-ruby (4.8.0)
|
73
73
|
erubi (1.9.0)
|
74
74
|
excon (0.73.0)
|
75
75
|
exponential-backoff (0.0.4)
|
76
|
-
ffi (1.
|
76
|
+
ffi (1.12.2)
|
77
77
|
formatador (0.2.5)
|
78
78
|
globalid (0.4.2)
|
79
79
|
activesupport (>= 4.2.0)
|
80
|
-
guard (2.16.
|
80
|
+
guard (2.16.2)
|
81
81
|
formatador (>= 0.2.4)
|
82
82
|
listen (>= 2.7, < 4.0)
|
83
83
|
lumberjack (>= 1.0.12, < 2.0)
|
@@ -107,17 +107,17 @@ GEM
|
|
107
107
|
loofah (2.5.0)
|
108
108
|
crass (~> 1.0.2)
|
109
109
|
nokogiri (>= 1.5.9)
|
110
|
-
lumberjack (1.
|
110
|
+
lumberjack (1.2.4)
|
111
111
|
mail (2.7.1)
|
112
112
|
mini_mime (>= 0.1.1)
|
113
113
|
marcel (0.3.3)
|
114
114
|
mimemagic (~> 0.3.2)
|
115
|
-
method_source (0.
|
116
|
-
mimemagic (0.3.
|
115
|
+
method_source (1.0.0)
|
116
|
+
mimemagic (0.3.5)
|
117
117
|
mini_mime (1.0.2)
|
118
118
|
mini_portile2 (2.4.0)
|
119
119
|
minitest (5.14.0)
|
120
|
-
msgpack (1.3.
|
120
|
+
msgpack (1.3.3)
|
121
121
|
multi_json (1.14.1)
|
122
122
|
mysql2 (0.5.3)
|
123
123
|
nenv (0.3.0)
|
@@ -128,9 +128,9 @@ GEM
|
|
128
128
|
nenv (~> 0.1)
|
129
129
|
shellany (~> 0.0)
|
130
130
|
parallel (1.19.1)
|
131
|
-
parser (2.
|
131
|
+
parser (2.7.1.2)
|
132
132
|
ast (~> 2.4.0)
|
133
|
-
pg (1.
|
133
|
+
pg (1.2.3)
|
134
134
|
phobos (1.9.0)
|
135
135
|
activesupport (>= 3.0.0)
|
136
136
|
concurrent-ruby (>= 1.0.2)
|
@@ -139,9 +139,9 @@ GEM
|
|
139
139
|
logging
|
140
140
|
ruby-kafka
|
141
141
|
thor
|
142
|
-
pry (0.
|
143
|
-
coderay (~> 1.1
|
144
|
-
method_source (~>
|
142
|
+
pry (0.13.1)
|
143
|
+
coderay (~> 1.1)
|
144
|
+
method_source (~> 1.0)
|
145
145
|
rack (2.2.2)
|
146
146
|
rack-test (1.1.0)
|
147
147
|
rack (>= 1.0, < 3)
|
@@ -171,32 +171,34 @@ GEM
|
|
171
171
|
thor (>= 0.19.0, < 2.0)
|
172
172
|
rainbow (3.0.0)
|
173
173
|
rake (13.0.1)
|
174
|
-
rb-fsevent (0.10.
|
175
|
-
rb-inotify (0.10.
|
174
|
+
rb-fsevent (0.10.4)
|
175
|
+
rb-inotify (0.10.1)
|
176
176
|
ffi (~> 1.0)
|
177
|
+
rexml (3.2.4)
|
177
178
|
rspec (3.9.0)
|
178
179
|
rspec-core (~> 3.9.0)
|
179
180
|
rspec-expectations (~> 3.9.0)
|
180
181
|
rspec-mocks (~> 3.9.0)
|
181
|
-
rspec-core (3.9.
|
182
|
-
rspec-support (~> 3.9.
|
183
|
-
rspec-expectations (3.9.
|
182
|
+
rspec-core (3.9.2)
|
183
|
+
rspec-support (~> 3.9.3)
|
184
|
+
rspec-expectations (3.9.1)
|
184
185
|
diff-lcs (>= 1.2.0, < 2.0)
|
185
186
|
rspec-support (~> 3.9.0)
|
186
|
-
rspec-mocks (3.9.
|
187
|
+
rspec-mocks (3.9.1)
|
187
188
|
diff-lcs (>= 1.2.0, < 2.0)
|
188
189
|
rspec-support (~> 3.9.0)
|
189
|
-
rspec-support (3.9.
|
190
|
+
rspec-support (3.9.3)
|
190
191
|
rspec_junit_formatter (0.4.1)
|
191
192
|
rspec-core (>= 2, < 4, != 2.12.0)
|
192
|
-
rubocop (0.
|
193
|
+
rubocop (0.82.0)
|
193
194
|
jaro_winkler (~> 1.5.1)
|
194
195
|
parallel (~> 1.10)
|
195
|
-
parser (>= 2.
|
196
|
+
parser (>= 2.7.0.1)
|
196
197
|
rainbow (>= 2.2.2, < 4.0)
|
198
|
+
rexml
|
197
199
|
ruby-progressbar (~> 1.7)
|
198
|
-
unicode-display_width (>= 1.4.0, <
|
199
|
-
rubocop-rspec (1.
|
200
|
+
unicode-display_width (>= 1.4.0, < 2.0)
|
201
|
+
rubocop-rspec (1.39.0)
|
200
202
|
rubocop (>= 0.68.1)
|
201
203
|
ruby-kafka (0.7.10)
|
202
204
|
digest-crc
|
@@ -209,12 +211,12 @@ GEM
|
|
209
211
|
actionpack (>= 4.0)
|
210
212
|
activesupport (>= 4.0)
|
211
213
|
sprockets (>= 3.0.0)
|
212
|
-
sqlite3 (1.4.
|
214
|
+
sqlite3 (1.4.2)
|
213
215
|
thor (1.0.1)
|
214
216
|
thread_safe (0.3.6)
|
215
217
|
tzinfo (1.2.7)
|
216
218
|
thread_safe (~> 0.1)
|
217
|
-
unicode-display_width (1.
|
219
|
+
unicode-display_width (1.7.0)
|
218
220
|
websocket-driver (0.7.1)
|
219
221
|
websocket-extensions (>= 0.1.0)
|
220
222
|
websocket-extensions (0.1.4)
|
@@ -226,7 +228,6 @@ DEPENDENCIES
|
|
226
228
|
activerecord (~> 5.2)
|
227
229
|
activerecord-import
|
228
230
|
avro (~> 1.9)
|
229
|
-
bundler (~> 1)
|
230
231
|
ddtrace (~> 0.11)
|
231
232
|
deimos-ruby!
|
232
233
|
dogstatsd-ruby (~> 4.2)
|
@@ -244,4 +245,4 @@ DEPENDENCIES
|
|
244
245
|
sqlite3 (~> 1.3)
|
245
246
|
|
246
247
|
BUNDLED WITH
|
247
|
-
1.
|
248
|
+
2.1.4
|
data/README.md
CHANGED
@@ -23,6 +23,7 @@ Built on Phobos and hence Ruby-Kafka.
|
|
23
23
|
* [Consumers](#consumers)
|
24
24
|
* [Rails Integration](#rails-integration)
|
25
25
|
* [Database Backend](#database-backend)
|
26
|
+
* [Database Poller](#database-poller)
|
26
27
|
* [Running Consumers](#running-consumers)
|
27
28
|
* [Metrics](#metrics)
|
28
29
|
* [Testing](#testing)
|
@@ -557,6 +558,75 @@ class MyConsumer < Deimos::ActiveRecordConsumer
|
|
557
558
|
end
|
558
559
|
```
|
559
560
|
|
561
|
+
## Database Poller
|
562
|
+
|
563
|
+
Another method of fetching updates from the database to Kafka is by polling
|
564
|
+
the database (a process popularized by [Kafka Connect](https://docs.confluent.io/current/connect/index.html)).
|
565
|
+
Deimos provides a database poller, which allows you the same pattern but
|
566
|
+
with all the flexibility of real Ruby code, and the added advantage of having
|
567
|
+
a single consistent framework to talk to Kafka.
|
568
|
+
|
569
|
+
One of the disadvantages of polling the database is that it can't detect deletions.
|
570
|
+
You can get over this by configuring a mixin to send messages *only* on deletion,
|
571
|
+
and use the poller to handle all other updates. You can reuse the same producer
|
572
|
+
for both cases to handle joins, changes/mappings, business logic, etc.
|
573
|
+
|
574
|
+
To enable the poller, generate the migration:
|
575
|
+
|
576
|
+
```ruby
|
577
|
+
rails g deimos:db_poller
|
578
|
+
```
|
579
|
+
|
580
|
+
Run the migration:
|
581
|
+
|
582
|
+
```ruby
|
583
|
+
rails db:migrate
|
584
|
+
```
|
585
|
+
|
586
|
+
Add the following configuration:
|
587
|
+
|
588
|
+
```ruby
|
589
|
+
Deimos.configure do
|
590
|
+
db_poller do
|
591
|
+
producer_class 'MyProducer' # an ActiveRecordProducer
|
592
|
+
end
|
593
|
+
db_poller do
|
594
|
+
producer_class 'MyOtherProducer'
|
595
|
+
run_every 2.minutes
|
596
|
+
delay 5.seconds # to allow for transactions to finish
|
597
|
+
full_table true # if set, dump the entire table every run; use for small tables
|
598
|
+
end
|
599
|
+
end
|
600
|
+
```
|
601
|
+
|
602
|
+
All the information around connecting and querying the database lives in the
|
603
|
+
producer itself, so you don't need to write any additional code. You can
|
604
|
+
define one additional method on the producer:
|
605
|
+
|
606
|
+
```ruby
|
607
|
+
class MyProducer < Deimos::ActiveRecordProducer
|
608
|
+
...
|
609
|
+
def poll_query(time_from:, time_to:, column_name:, min_id:)
|
610
|
+
# Default is to use the timestamp `column_name` to find all records
|
611
|
+
# between time_from and time_to, or records where `updated_at` is equal to
|
612
|
+
# `time_from` but its ID is greater than `min_id`. This is called
|
613
|
+
# successively as the DB is polled to ensure even if a batch ends in the
|
614
|
+
# middle of a timestamp, we won't miss any records.
|
615
|
+
# You can override or change this behavior if necessary.
|
616
|
+
end
|
617
|
+
end
|
618
|
+
```
|
619
|
+
|
620
|
+
To run the DB poller:
|
621
|
+
|
622
|
+
rake deimos:db_poller
|
623
|
+
|
624
|
+
Note that the DB poller creates one thread per configured poller, and is
|
625
|
+
currently designed *not* to be scaled out - i.e. it assumes you will only
|
626
|
+
have one process running at a time. If a particular poll takes longer than
|
627
|
+
the poll interval (i.e. interval is set at 1 minute but it takes 75 seconds)
|
628
|
+
the next poll will begin immediately following the first one completing.
|
629
|
+
|
560
630
|
## Running consumers
|
561
631
|
|
562
632
|
Deimos includes a rake task. Once it's in your gemfile, just run
|
data/Rakefile
CHANGED
data/deimos-ruby.gemspec
CHANGED
@@ -25,7 +25,6 @@ Gem::Specification.new do |spec|
|
|
25
25
|
spec.add_development_dependency('activerecord', '~> 5.2')
|
26
26
|
spec.add_development_dependency('activerecord-import')
|
27
27
|
spec.add_development_dependency('avro', '~> 1.9')
|
28
|
-
spec.add_development_dependency('bundler', '~> 1')
|
29
28
|
spec.add_development_dependency('ddtrace', '~> 0.11')
|
30
29
|
spec.add_development_dependency('dogstatsd-ruby', '~> 4.2')
|
31
30
|
spec.add_development_dependency('guard', '~> 2')
|
data/docs/CONFIGURATION.md
CHANGED
@@ -89,6 +89,29 @@ offset_commit_threshold|0|Number of messages that can be processed before their
|
|
89
89
|
heartbeat_interval|10|Interval between heartbeats; must be less than the session window.
|
90
90
|
backoff|`(1000..60_000)`|Range representing the minimum and maximum number of milliseconds to back off after a consumer error.
|
91
91
|
|
92
|
+
## Defining Database Pollers
|
93
|
+
|
94
|
+
These are used when polling the database via `rake deimos:db_poller`. You
|
95
|
+
can create a number of pollers, one per topic.
|
96
|
+
|
97
|
+
```ruby
|
98
|
+
Deimos.configure do
|
99
|
+
db_poller do
|
100
|
+
producer_class 'MyProducer'
|
101
|
+
run_every 2.minutes
|
102
|
+
end
|
103
|
+
end
|
104
|
+
```
|
105
|
+
|
106
|
+
Config name|Default|Description
|
107
|
+
-----------|-------|-----------
|
108
|
+
producer_class|nil|ActiveRecordProducer class to use for sending messages.
|
109
|
+
run_every|60|Amount of time in seconds to wait between runs.
|
110
|
+
timestamp_column|`:updated_at`|Name of the column to query. Remember to add an index to this column!
|
111
|
+
delay_time|2|Amount of time in seconds to wait before picking up records, to allow for transactions to finish.
|
112
|
+
full_table|false|If set to true, do a full table dump to Kafka each run. Good for very small tables.
|
113
|
+
start_from_beginning|true|If false, start from the current time instead of the beginning of time if this is the first time running the poller.
|
114
|
+
|
92
115
|
## Kafka Configuration
|
93
116
|
|
94
117
|
Config name|Default|Description
|
@@ -59,6 +59,29 @@ module Deimos
|
|
59
59
|
k.to_sym != :payload_key && !fields.map(&:name).include?(k)
|
60
60
|
end
|
61
61
|
end
|
62
|
+
|
63
|
+
# Query to use when polling the database with the DbPoller. Add
|
64
|
+
# includes, joins, or wheres as necessary, or replace entirely.
|
65
|
+
# @param time_from [Time] the time to start the query from.
|
66
|
+
# @param time_to [Time] the time to end the query.
|
67
|
+
# @param column_name [Symbol] the column name to look for.
|
68
|
+
# @param min_id [Numeric] the minimum ID (i.e. all IDs must be greater
|
69
|
+
# than this value).
|
70
|
+
# @return [ActiveRecord::Relation]
|
71
|
+
def poll_query(time_from:, time_to:, column_name: :updated_at, min_id:)
|
72
|
+
klass = config[:record_class]
|
73
|
+
table = ActiveRecord::Base.connection.quote_table_name(klass.table_name)
|
74
|
+
column = ActiveRecord::Base.connection.quote_column_name(column_name)
|
75
|
+
primary = ActiveRecord::Base.connection.quote_column_name(klass.primary_key)
|
76
|
+
klass.where(
|
77
|
+
"((#{table}.#{column} = ? AND #{table}.#{primary} > ?) \
|
78
|
+
OR #{table}.#{column} > ?) AND #{table}.#{column} <= ?",
|
79
|
+
time_from,
|
80
|
+
min_id,
|
81
|
+
time_from,
|
82
|
+
time_to
|
83
|
+
)
|
84
|
+
end
|
62
85
|
end
|
63
86
|
end
|
64
87
|
end
|
@@ -340,6 +340,26 @@ module Deimos
|
|
340
340
|
setting :heartbeat_interval
|
341
341
|
end
|
342
342
|
|
343
|
+
setting_object :db_poller do
|
344
|
+
# Producer class to use for the poller.
|
345
|
+
setting :producer_class
|
346
|
+
# How often to run the poller, in seconds. If the poll takes longer than this
|
347
|
+
# time, it will run again immediately and the timeout
|
348
|
+
# will be pushed to the next e.g. 1 minute.
|
349
|
+
setting :run_every, 60
|
350
|
+
# Column to use to find updates. Must have an index on it.
|
351
|
+
setting :timestamp_column, :updated_at
|
352
|
+
# Amount of time, in seconds, to wait before catching updates, to allow transactions
|
353
|
+
# to complete but still pick up the right records.
|
354
|
+
setting :delay_time, 2
|
355
|
+
# If true, dump the full table rather than incremental changes. Should
|
356
|
+
# only be used for very small tables.
|
357
|
+
setting :full_table, false
|
358
|
+
# If false, start from the current time instead of the beginning of time
|
359
|
+
# if this is the first time running the poller.
|
360
|
+
setting :start_from_beginning, true
|
361
|
+
end
|
362
|
+
|
343
363
|
deprecate 'kafka_logger', 'kafka.logger'
|
344
364
|
deprecate 'reraise_consumer_errors', 'consumers.reraise_errors'
|
345
365
|
deprecate 'schema_registry_url', 'schema.registry_url'
|
@@ -0,0 +1,149 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'deimos/poll_info'
|
4
|
+
require 'deimos/utils/executor'
|
5
|
+
require 'deimos/utils/signal_handler'
|
6
|
+
|
7
|
+
module Deimos
|
8
|
+
module Utils
|
9
|
+
# Class which continually polls the database and sends Kafka messages.
|
10
|
+
class DbPoller
|
11
|
+
BATCH_SIZE = 1000
|
12
|
+
|
13
|
+
# Needed for Executor so it can identify the worker
|
14
|
+
attr_reader :id
|
15
|
+
|
16
|
+
# Begin the DB Poller process.
|
17
|
+
def self.start!
|
18
|
+
if Deimos.config.db_poller_objects.empty?
|
19
|
+
raise('No pollers configured!')
|
20
|
+
end
|
21
|
+
|
22
|
+
pollers = Deimos.config.db_poller_objects.map do |poller_config|
|
23
|
+
self.new(poller_config)
|
24
|
+
end
|
25
|
+
executor = Deimos::Utils::Executor.new(pollers,
|
26
|
+
sleep_seconds: 5,
|
27
|
+
logger: Deimos.config.logger)
|
28
|
+
signal_handler = Deimos::Utils::SignalHandler.new(executor)
|
29
|
+
signal_handler.run!
|
30
|
+
end
|
31
|
+
|
32
|
+
# @param config [Deimos::Configuration::ConfigStruct]
|
33
|
+
def initialize(config)
|
34
|
+
@config = config
|
35
|
+
@id = SecureRandom.hex
|
36
|
+
begin
|
37
|
+
@producer = @config.producer_class.constantize
|
38
|
+
rescue NameError
|
39
|
+
raise "Class #{@config.producer_class} not found!"
|
40
|
+
end
|
41
|
+
unless @producer < Deimos::ActiveRecordProducer
|
42
|
+
raise "Class #{@producer.class.name} is not an ActiveRecordProducer!"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# Start the poll:
|
47
|
+
# 1) Grab the current PollInfo from the database indicating the last
|
48
|
+
# time we ran
|
49
|
+
# 2) On a loop, process all the recent updates between the last time
|
50
|
+
# we ran and now.
|
51
|
+
def start
|
52
|
+
# Don't send asynchronously
|
53
|
+
if Deimos.config.producers.backend == :kafka_async
|
54
|
+
Deimos.config.producers.backend = :kafka
|
55
|
+
end
|
56
|
+
Deimos.config.logger.info('Starting...')
|
57
|
+
@signal_to_stop = false
|
58
|
+
retrieve_poll_info
|
59
|
+
loop do
|
60
|
+
if @signal_to_stop
|
61
|
+
Deimos.config.logger.info('Shutting down')
|
62
|
+
break
|
63
|
+
end
|
64
|
+
process_updates
|
65
|
+
sleep 0.1
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
# Grab the PollInfo or create if it doesn't exist.
|
70
|
+
def retrieve_poll_info
|
71
|
+
ActiveRecord::Base.connection.reconnect!
|
72
|
+
new_time = @config.start_from_beginning ? Time.new(0) : Time.zone.now
|
73
|
+
@info = Deimos::PollInfo.find_by_producer(@config.producer_class) ||
|
74
|
+
Deimos::PollInfo.create!(producer: @config.producer_class,
|
75
|
+
last_sent: new_time,
|
76
|
+
last_sent_id: 0)
|
77
|
+
end
|
78
|
+
|
79
|
+
# Stop the poll.
|
80
|
+
def stop
|
81
|
+
Deimos.config.logger.info('Received signal to stop')
|
82
|
+
@signal_to_stop = true
|
83
|
+
end
|
84
|
+
|
85
|
+
# Indicate whether this current loop should process updates. Most loops
|
86
|
+
# will busy-wait (sleeping 0.1 seconds) until it's ready.
|
87
|
+
# @return [Boolean]
|
88
|
+
def should_run?
|
89
|
+
Time.zone.now - @info.last_sent - @config.delay_time >= @config.run_every
|
90
|
+
end
|
91
|
+
|
92
|
+
# @param record [ActiveRecord::Base]
|
93
|
+
# @return [ActiveSupport::TimeWithZone]
|
94
|
+
def last_updated(record)
|
95
|
+
record.public_send(@config.timestamp_column)
|
96
|
+
end
|
97
|
+
|
98
|
+
# Send messages for updated data.
|
99
|
+
def process_updates
|
100
|
+
return unless should_run?
|
101
|
+
|
102
|
+
time_from = @config.full_table ? Time.new(0) : @info.last_sent.in_time_zone
|
103
|
+
time_to = Time.zone.now - @config.delay_time
|
104
|
+
Deimos.config.logger.info("Polling #{@producer.topic} from #{time_from} to #{time_to}")
|
105
|
+
message_count = 0
|
106
|
+
batch_count = 0
|
107
|
+
|
108
|
+
# poll_query gets all the relevant data from the database, as defined
|
109
|
+
# by the producer itself.
|
110
|
+
loop do
|
111
|
+
Deimos.config.logger.debug("Polling #{@producer.topic}, batch #{batch_count + 1}")
|
112
|
+
batch = fetch_results(time_from, time_to).to_a
|
113
|
+
break if batch.empty?
|
114
|
+
|
115
|
+
batch_count += 1
|
116
|
+
process_batch(batch)
|
117
|
+
message_count += batch.size
|
118
|
+
time_from = last_updated(batch.last)
|
119
|
+
end
|
120
|
+
Deimos.config.logger.info("Poll #{@producer.topic} complete at #{time_to} (#{message_count} messages, #{batch_count} batches}")
|
121
|
+
end
|
122
|
+
|
123
|
+
# @param time_from [ActiveSupport::TimeWithZone]
|
124
|
+
# @param time_to [ActiveSupport::TimeWithZone]
|
125
|
+
# @return [ActiveRecord::Relation]
|
126
|
+
def fetch_results(time_from, time_to)
|
127
|
+
id = @producer.config[:record_class].primary_key
|
128
|
+
quoted_timestamp = ActiveRecord::Base.connection.quote_column_name(@config.timestamp_column)
|
129
|
+
quoted_id = ActiveRecord::Base.connection.quote_column_name(id)
|
130
|
+
@producer.poll_query(time_from: time_from,
|
131
|
+
time_to: time_to,
|
132
|
+
column_name: @config.timestamp_column,
|
133
|
+
min_id: @info.last_sent_id).
|
134
|
+
limit(BATCH_SIZE).
|
135
|
+
order("#{quoted_timestamp}, #{quoted_id}")
|
136
|
+
end
|
137
|
+
|
138
|
+
# @param batch [Array<ActiveRecord::Base>]
|
139
|
+
def process_batch(batch)
|
140
|
+
record = batch.last
|
141
|
+
id_method = record.class.primary_key
|
142
|
+
last_id = record.public_send(id_method)
|
143
|
+
last_updated_at = last_updated(record)
|
144
|
+
@producer.send_events(batch)
|
145
|
+
@info.update_attributes!(last_sent: last_updated_at, last_sent_id: last_id)
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|