deimos-ruby 1.17.1 → 1.18.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -1
- data/CHANGELOG.md +9 -0
- data/README.md +19 -0
- data/docs/CONFIGURATION.md +8 -3
- data/lib/deimos/config/configuration.rb +19 -7
- data/lib/deimos/schema_backends/mock.rb +1 -1
- data/lib/deimos/utils/db_poller/base.rb +139 -0
- data/lib/deimos/utils/db_poller/state_based.rb +57 -0
- data/lib/deimos/utils/db_poller/time_based.rb +82 -0
- data/lib/deimos/utils/db_poller.rb +22 -170
- data/lib/deimos/version.rb +1 -1
- data/rbs_collection.lock.yaml +43 -19
- data/sig/defs.rbs +234 -173
- data/spec/utils/db_poller_spec.rb +48 -35
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 93b418353fa710fd90425980ec2772f6cb4249b1d4e43c3cadd9fb7a099eb387
|
4
|
+
data.tar.gz: b319ef0e32b2e147b7ec0a3b131d8d8269e9818faa5313b6f56745498a2942c3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 60bd9c6f8cb7e4a63d791312b2fb784cfed4f2b0c60f679d71f1fff1adeac8ca86cbf285e42bf24fb1e65748fe5c1a0127710925d068c5d6bf6feeb4753ed203
|
7
|
+
data.tar.gz: 9e30a8666f75d62fcee4c5f99de3737af5b58c6bafc9c54f786784785472233dc6a9135c525b288d04320acb716b58274b9dcaeeba90965a04721e0e77588c83
|
data/.rubocop.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -7,6 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
7
7
|
|
8
8
|
## UNRELEASED
|
9
9
|
|
10
|
+
# 1.18.0 - 2022-11-01
|
11
|
+
|
12
|
+
### Features :star:
|
13
|
+
|
14
|
+
- Add the `state_based` mode for DB pollers.
|
15
|
+
|
16
|
+
### Fixes :wrench:
|
17
|
+
- Fix the mock schema backend's `encode_key` method so it doesn't crash when used in application code.
|
18
|
+
|
10
19
|
# 1.17.1 - 2022-10-20
|
11
20
|
|
12
21
|
- Fix the log message for publishing messages so it uses the topic of the actual message instead of
|
data/README.md
CHANGED
@@ -803,6 +803,25 @@ end
|
|
803
803
|
Note that the poller will retry infinitely if it encounters a Kafka-related error such
|
804
804
|
as a communication failure. For all other errors, it will retry once by default.
|
805
805
|
|
806
|
+
### State-based pollers
|
807
|
+
|
808
|
+
By default, pollers use timestamps and IDs to determine the records to publish. However, you can
|
809
|
+
set a different mode whereby it will include all records that match your query, and when done,
|
810
|
+
will update a state and/or timestamp column which should remove it from that query. With this
|
811
|
+
algorithm, you can ignore the `updated_at` and `id` columns.
|
812
|
+
|
813
|
+
To configure a state-based poller:
|
814
|
+
|
815
|
+
```ruby
|
816
|
+
db_poller do
|
817
|
+
mode :state_based
|
818
|
+
state_column :publish_state # the name of the column to update state to
|
819
|
+
publish_timestamp_column :published_at # the column to update when publishing succeeds
|
820
|
+
published_state 'published' # the value to put into the state_column when publishing succeeds
|
821
|
+
failed_state 'publish_failed' the value to put into the state_column when publishing fails
|
822
|
+
end
|
823
|
+
```
|
824
|
+
|
806
825
|
## Running consumers
|
807
826
|
|
808
827
|
Deimos includes a rake task. Once it's in your gemfile, just run
|
data/docs/CONFIGURATION.md
CHANGED
@@ -112,14 +112,19 @@ end
|
|
112
112
|
```
|
113
113
|
|
114
114
|
Config name|Default|Description
|
115
|
-
|
115
|
+
-----------|--|-----------
|
116
116
|
producer_class|nil|ActiveRecordProducer class to use for sending messages.
|
117
|
+
mode|:time_based|Whether to use time-based polling or state-based polling.
|
117
118
|
run_every|60|Amount of time in seconds to wait between runs.
|
118
119
|
timestamp_column|`:updated_at`|Name of the column to query. Remember to add an index to this column!
|
119
120
|
delay_time|2|Amount of time in seconds to wait before picking up records, to allow for transactions to finish.
|
120
|
-
full_table|false|If set to true, do a full table dump to Kafka each run. Good for very small tables.
|
121
|
-
start_from_beginning|true|If false, start from the current time instead of the beginning of time if this is the first time running the poller.
|
122
121
|
retries|1|The number of times to retry for a *non-Kafka* error.
|
122
|
+
full_table|false|If set to true, do a full table dump to Kafka each run. Good for very small tables. Time-based only.
|
123
|
+
start_from_beginning|true|If false, start from the current time instead of the beginning of time if this is the first time running the poller. Time-based only.
|
124
|
+
state_column|nil|If set, this represents the DB column to use to update publishing status. State-based only.
|
125
|
+
publish_timestamp_column|nil|If set, this represents the DB column to use to update when publishing is done. State-based only.
|
126
|
+
published_state|nil|If set, the poller will update the `state_column` to this value when publishing succeeds. State-based only.
|
127
|
+
failed_state|nil|If set, the poller will update the `state_column` to this value when publishing fails. State-based only.
|
123
128
|
|
124
129
|
## Kafka Configuration
|
125
130
|
|
@@ -444,25 +444,37 @@ module Deimos
|
|
444
444
|
end
|
445
445
|
|
446
446
|
setting_object :db_poller do
|
447
|
+
# Mode to use for querying - :time_based (via updated_at) or :state_based.
|
448
|
+
setting :mode, :time_based
|
447
449
|
# Producer class to use for the poller.
|
448
450
|
setting :producer_class
|
449
451
|
# How often to run the poller, in seconds. If the poll takes longer than this
|
450
452
|
# time, it will run again immediately and the timeout
|
451
453
|
# will be pushed to the next e.g. 1 minute.
|
452
454
|
setting :run_every, 60
|
453
|
-
#
|
454
|
-
setting :
|
455
|
+
# The number of times to retry production when encountering a *non-Kafka* error.
|
456
|
+
setting :retries, 1
|
455
457
|
# Amount of time, in seconds, to wait before catching updates, to allow transactions
|
456
|
-
# to complete but still pick up the right records.
|
458
|
+
# to complete but still pick up the right records. Should only be set for time-based mode.
|
457
459
|
setting :delay_time, 2
|
460
|
+
# Column to use to find updates. Must have an index on it.
|
461
|
+
setting :timestamp_column, :updated_at
|
462
|
+
|
458
463
|
# If true, dump the full table rather than incremental changes. Should
|
459
|
-
# only be used for very small tables.
|
464
|
+
# only be used for very small tables. Time-based only.
|
460
465
|
setting :full_table, false
|
461
466
|
# If false, start from the current time instead of the beginning of time
|
462
|
-
# if this is the first time running the poller.
|
467
|
+
# if this is the first time running the poller. Time-based only.
|
463
468
|
setting :start_from_beginning, true
|
464
|
-
|
465
|
-
|
469
|
+
|
470
|
+
# Column to set once publishing is complete - state-based only.
|
471
|
+
setting :state_column
|
472
|
+
# Column to update with e.g. published_at. State-based only.
|
473
|
+
setting :publish_timestamp_column
|
474
|
+
# Value to set the state_column to once published - state-based only.
|
475
|
+
setting :published_state
|
476
|
+
# Value to set the state_column to if publishing fails - state-based only.
|
477
|
+
setting :failed_state
|
466
478
|
end
|
467
479
|
|
468
480
|
deprecate 'kafka_logger', 'kafka.logger'
|
@@ -0,0 +1,139 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'deimos/utils/db_poller'
|
4
|
+
require 'deimos/poll_info'
|
5
|
+
require 'sigurd'
|
6
|
+
|
7
|
+
module Deimos
|
8
|
+
module Utils
|
9
|
+
# Class which continually polls the database and sends Kafka messages.
|
10
|
+
module DbPoller
|
11
|
+
# Base poller class for retrieving and publishing messages.
|
12
|
+
class Base
|
13
|
+
|
14
|
+
# @return [Integer]
|
15
|
+
BATCH_SIZE = 1000
|
16
|
+
|
17
|
+
# Needed for Executor so it can identify the worker
|
18
|
+
# @return [Integer]
|
19
|
+
attr_reader :id
|
20
|
+
|
21
|
+
# @return [Hash]
|
22
|
+
attr_reader :config
|
23
|
+
|
24
|
+
# @param config [FigTree::ConfigStruct]
|
25
|
+
def initialize(config)
|
26
|
+
@config = config
|
27
|
+
@id = SecureRandom.hex
|
28
|
+
begin
|
29
|
+
@producer = @config.producer_class.constantize
|
30
|
+
rescue NameError
|
31
|
+
raise "Class #{@config.producer_class} not found!"
|
32
|
+
end
|
33
|
+
unless @producer < Deimos::ActiveRecordProducer
|
34
|
+
raise "Class #{@producer.class.name} is not an ActiveRecordProducer!"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# Start the poll:
|
39
|
+
# 1) Grab the current PollInfo from the database indicating the last
|
40
|
+
# time we ran
|
41
|
+
# 2) On a loop, process all the recent updates between the last time
|
42
|
+
# we ran and now.
|
43
|
+
# @return [void]
|
44
|
+
def start
|
45
|
+
# Don't send asynchronously
|
46
|
+
if Deimos.config.producers.backend == :kafka_async
|
47
|
+
Deimos.config.producers.backend = :kafka
|
48
|
+
end
|
49
|
+
Deimos.config.logger.info('Starting...')
|
50
|
+
@signal_to_stop = false
|
51
|
+
ActiveRecord::Base.connection.reconnect! unless ActiveRecord::Base.connection.open_transactions.positive?
|
52
|
+
|
53
|
+
retrieve_poll_info
|
54
|
+
loop do
|
55
|
+
if @signal_to_stop
|
56
|
+
Deimos.config.logger.info('Shutting down')
|
57
|
+
break
|
58
|
+
end
|
59
|
+
process_updates if should_run?
|
60
|
+
sleep(0.1)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# @return [void]
|
65
|
+
# Grab the PollInfo or create if it doesn't exist.
|
66
|
+
# @return [void]
|
67
|
+
def retrieve_poll_info
|
68
|
+
@info = Deimos::PollInfo.find_by_producer(@config.producer_class) || create_poll_info
|
69
|
+
end
|
70
|
+
|
71
|
+
# @return [Deimos::PollInfo]
|
72
|
+
def create_poll_info
|
73
|
+
Deimos::PollInfo.create!(producer: @config.producer_class, last_sent: Time.new(0))
|
74
|
+
end
|
75
|
+
|
76
|
+
# Indicate whether this current loop should process updates. Most loops
|
77
|
+
# will busy-wait (sleeping 0.1 seconds) until it's ready.
|
78
|
+
# @return [Boolean]
|
79
|
+
def should_run?
|
80
|
+
Time.zone.now - @info.last_sent - @config.delay_time >= @config.run_every
|
81
|
+
end
|
82
|
+
|
83
|
+
# Stop the poll.
|
84
|
+
# @return [void]
|
85
|
+
def stop
|
86
|
+
Deimos.config.logger.info('Received signal to stop')
|
87
|
+
@signal_to_stop = true
|
88
|
+
end
|
89
|
+
|
90
|
+
# Send messages for updated data.
|
91
|
+
# @return [void]
|
92
|
+
def process_updates
|
93
|
+
raise Deimos::MissingImplementationError
|
94
|
+
end
|
95
|
+
|
96
|
+
# @param batch [Array<ActiveRecord::Base>]
|
97
|
+
# @param status [PollStatus]
|
98
|
+
# @return [Boolean]
|
99
|
+
def process_batch_with_span(batch, status)
|
100
|
+
retries = 0
|
101
|
+
begin
|
102
|
+
span = Deimos.config.tracer&.start(
|
103
|
+
'deimos-db-poller',
|
104
|
+
resource: @producer.class.name.gsub('::', '-')
|
105
|
+
)
|
106
|
+
process_batch(batch)
|
107
|
+
Deimos.config.tracer&.finish(span)
|
108
|
+
status.batches_processed += 1
|
109
|
+
rescue Kafka::Error => e # keep trying till it fixes itself
|
110
|
+
Deimos.config.logger.error("Error publishing through DB Poller: #{e.message}")
|
111
|
+
sleep(0.5)
|
112
|
+
retry
|
113
|
+
rescue StandardError => e
|
114
|
+
Deimos.config.logger.error("Error publishing through DB poller: #{e.message}}")
|
115
|
+
if retries < @config.retries
|
116
|
+
retries += 1
|
117
|
+
sleep(0.5)
|
118
|
+
retry
|
119
|
+
else
|
120
|
+
Deimos.config.logger.error('Retries exceeded, moving on to next batch')
|
121
|
+
Deimos.config.tracer&.set_error(span, e)
|
122
|
+
status.batches_errored += 1
|
123
|
+
return false
|
124
|
+
end
|
125
|
+
ensure
|
126
|
+
status.messages_processed += batch.size
|
127
|
+
end
|
128
|
+
true
|
129
|
+
end
|
130
|
+
|
131
|
+
# @param batch [Array<ActiveRecord::Base>]
|
132
|
+
# @return [void]
|
133
|
+
def process_batch(batch)
|
134
|
+
@producer.send_events(batch)
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'deimos/utils/db_poller/base'
|
4
|
+
|
5
|
+
module Deimos
|
6
|
+
module Utils
|
7
|
+
module DbPoller
|
8
|
+
# Poller that uses state columns to determine the records to publish.
|
9
|
+
class StateBased < Base
|
10
|
+
# Send messages for updated data.
|
11
|
+
# @return [void]
|
12
|
+
def process_updates
|
13
|
+
Deimos.config.logger.info("Polling #{@producer.topic}")
|
14
|
+
status = PollStatus.new(0, 0, 0)
|
15
|
+
|
16
|
+
# poll_query gets all the relevant data from the database, as defined
|
17
|
+
# by the producer itself.
|
18
|
+
loop do
|
19
|
+
Deimos.config.logger.debug("Polling #{@producer.topic}, batch #{status.current_batch}")
|
20
|
+
batch = fetch_results.to_a
|
21
|
+
break if batch.empty?
|
22
|
+
|
23
|
+
success = process_batch_with_span(batch, status)
|
24
|
+
finalize_batch(batch, success)
|
25
|
+
end
|
26
|
+
Deimos.config.logger.info("Poll #{@producer.topic} complete (#{status.report}")
|
27
|
+
end
|
28
|
+
|
29
|
+
# @return [ActiveRecord::Relation]
|
30
|
+
def fetch_results
|
31
|
+
@producer.poll_query.limit(BATCH_SIZE).order(@config.timestamp_column)
|
32
|
+
end
|
33
|
+
|
34
|
+
# @param batch [Array<ActiveRecord::Base>]
|
35
|
+
# @param success [Boolean]
|
36
|
+
# @return [void]
|
37
|
+
def finalize_batch(batch, success)
|
38
|
+
@info.touch
|
39
|
+
|
40
|
+
state = success ? @config.published_state : @config.failed_state
|
41
|
+
klass = batch.first.class
|
42
|
+
id_col = record.class.primary_key
|
43
|
+
timestamp_col = @config.timestamp_column
|
44
|
+
|
45
|
+
attrs = { timestamp_col => Time.zone.now }
|
46
|
+
attrs[@config.state_column] = state if state
|
47
|
+
if @config.publish_timestamp_column
|
48
|
+
attrs[@config.publish_timestamp_column] = Time.zone.now
|
49
|
+
end
|
50
|
+
|
51
|
+
klass.where(id_col => batch.map(&id_col)).update_all(attrs)
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'deimos/utils/db_poller/base'
|
4
|
+
|
5
|
+
module Deimos
|
6
|
+
module Utils
|
7
|
+
module DbPoller
|
8
|
+
# Poller that uses ID and updated_at to determine the records to publish.
|
9
|
+
class TimeBased < Base
|
10
|
+
|
11
|
+
# :nodoc:
|
12
|
+
def create_poll_info
|
13
|
+
new_time = @config.start_from_beginning ? Time.new(0) : Time.zone.now
|
14
|
+
Deimos::PollInfo.create!(producer: @config.producer_class,
|
15
|
+
last_sent: new_time,
|
16
|
+
last_sent_id: 0)
|
17
|
+
end
|
18
|
+
|
19
|
+
# @param batch [Array<ActiveRecord::Base>]
|
20
|
+
# @param status [Deimos::Utils::DbPoller::PollStatus]
|
21
|
+
def process_and_touch_info(batch, status)
|
22
|
+
process_batch_with_span(batch, status)
|
23
|
+
self.touch_info(batch)
|
24
|
+
end
|
25
|
+
|
26
|
+
# Send messages for updated data.
|
27
|
+
# @return [void]
|
28
|
+
def process_updates
|
29
|
+
time_from = @config.full_table ? Time.new(0) : @info.last_sent.in_time_zone
|
30
|
+
time_to = Time.zone.now - @config.delay_time
|
31
|
+
Deimos.config.logger.info("Polling #{@producer.topic} from #{time_from} to #{time_to}")
|
32
|
+
status = PollStatus.new(0, 0, 0)
|
33
|
+
|
34
|
+
# poll_query gets all the relevant data from the database, as defined
|
35
|
+
# by the producer itself.
|
36
|
+
loop do
|
37
|
+
Deimos.config.logger.debug("Polling #{@producer.topic}, batch #{status.current_batch}")
|
38
|
+
batch = fetch_results(time_from, time_to).to_a
|
39
|
+
break if batch.empty?
|
40
|
+
|
41
|
+
process_and_touch_info(batch, status)
|
42
|
+
time_from = last_updated(batch.last)
|
43
|
+
end
|
44
|
+
Deimos.config.logger.info("Poll #{@producer.topic} complete at #{time_to} (#{status.report})")
|
45
|
+
end
|
46
|
+
|
47
|
+
# @param time_from [ActiveSupport::TimeWithZone]
|
48
|
+
# @param time_to [ActiveSupport::TimeWithZone]
|
49
|
+
# @return [ActiveRecord::Relation]
|
50
|
+
def fetch_results(time_from, time_to)
|
51
|
+
id = @producer.config[:record_class].primary_key
|
52
|
+
quoted_timestamp = ActiveRecord::Base.connection.quote_column_name(@config.timestamp_column)
|
53
|
+
quoted_id = ActiveRecord::Base.connection.quote_column_name(id)
|
54
|
+
@producer.poll_query(time_from: time_from,
|
55
|
+
time_to: time_to,
|
56
|
+
column_name: @config.timestamp_column,
|
57
|
+
min_id: @info.last_sent_id).
|
58
|
+
limit(BATCH_SIZE).
|
59
|
+
order("#{quoted_timestamp}, #{quoted_id}")
|
60
|
+
end
|
61
|
+
|
62
|
+
# @param record [ActiveRecord::Base]
|
63
|
+
# @return [ActiveSupport::TimeWithZone]
|
64
|
+
def last_updated(record)
|
65
|
+
record.public_send(@config.timestamp_column)
|
66
|
+
end
|
67
|
+
|
68
|
+
# @param batch [Array<ActiveRecord::Base>]
|
69
|
+
# @return [void]
|
70
|
+
def touch_info(batch)
|
71
|
+
record = batch.last
|
72
|
+
id_method = record.class.primary_key
|
73
|
+
last_id = record.public_send(id_method)
|
74
|
+
last_updated_at = last_updated(record)
|
75
|
+
@info.attributes = { last_sent: last_updated_at, last_sent_id: last_id }
|
76
|
+
@info.save!
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
@@ -1,22 +1,9 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require 'deimos/poll_info'
|
4
|
-
require 'sigurd'
|
5
|
-
|
6
3
|
module Deimos
|
7
4
|
module Utils
|
8
|
-
#
|
9
|
-
|
10
|
-
# @return [Integer]
|
11
|
-
BATCH_SIZE = 1000
|
12
|
-
|
13
|
-
# Needed for Executor so it can identify the worker
|
14
|
-
# @return [Integer]
|
15
|
-
attr_reader :id
|
16
|
-
|
17
|
-
# @return [Hash]
|
18
|
-
attr_reader :config
|
19
|
-
|
5
|
+
# Overall functionality related to DB poller.
|
6
|
+
module DbPoller
|
20
7
|
# Begin the DB Poller process.
|
21
8
|
# @return [void]
|
22
9
|
def self.start!
|
@@ -25,7 +12,7 @@ module Deimos
|
|
25
12
|
end
|
26
13
|
|
27
14
|
pollers = Deimos.config.db_poller_objects.map do |poller_config|
|
28
|
-
self.new(poller_config)
|
15
|
+
self.class_for_config(poller_config.mode).new(poller_config)
|
29
16
|
end
|
30
17
|
executor = Sigurd::Executor.new(pollers,
|
31
18
|
sleep_seconds: 5,
|
@@ -34,168 +21,33 @@ module Deimos
|
|
34
21
|
signal_handler.run!
|
35
22
|
end
|
36
23
|
|
37
|
-
# @param
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
end
|
46
|
-
unless @producer < Deimos::ActiveRecordProducer
|
47
|
-
raise "Class #{@producer.class.name} is not an ActiveRecordProducer!"
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
# Start the poll:
|
52
|
-
# 1) Grab the current PollInfo from the database indicating the last
|
53
|
-
# time we ran
|
54
|
-
# 2) On a loop, process all the recent updates between the last time
|
55
|
-
# we ran and now.
|
56
|
-
# @return [void]
|
57
|
-
def start
|
58
|
-
# Don't send asynchronously
|
59
|
-
if Deimos.config.producers.backend == :kafka_async
|
60
|
-
Deimos.config.producers.backend = :kafka
|
24
|
+
# @param config_name [Symbol]
|
25
|
+
# @return [Class<Deimos::Utils::DbPoller>]
|
26
|
+
def self.class_for_config(config_name)
|
27
|
+
case config_name
|
28
|
+
when :state_based
|
29
|
+
Deimos::Utils::DbPoller::StateBased
|
30
|
+
else
|
31
|
+
Deimos::Utils::DbPoller::TimeBased
|
61
32
|
end
|
62
|
-
Deimos.config.logger.info('Starting...')
|
63
|
-
@signal_to_stop = false
|
64
|
-
retrieve_poll_info
|
65
|
-
loop do
|
66
|
-
if @signal_to_stop
|
67
|
-
Deimos.config.logger.info('Shutting down')
|
68
|
-
break
|
69
|
-
end
|
70
|
-
process_updates
|
71
|
-
sleep 0.1
|
72
|
-
end
|
73
|
-
end
|
74
|
-
|
75
|
-
# Grab the PollInfo or create if it doesn't exist.
|
76
|
-
# @return [void]
|
77
|
-
def retrieve_poll_info
|
78
|
-
ActiveRecord::Base.connection.reconnect! unless ActiveRecord::Base.connection.open_transactions.positive?
|
79
|
-
new_time = @config.start_from_beginning ? Time.new(0) : Time.zone.now
|
80
|
-
@info = Deimos::PollInfo.find_by_producer(@config.producer_class) ||
|
81
|
-
Deimos::PollInfo.create!(producer: @config.producer_class,
|
82
|
-
last_sent: new_time,
|
83
|
-
last_sent_id: 0)
|
84
|
-
end
|
85
|
-
|
86
|
-
# Stop the poll.
|
87
|
-
# @return [void]
|
88
|
-
def stop
|
89
|
-
Deimos.config.logger.info('Received signal to stop')
|
90
|
-
@signal_to_stop = true
|
91
|
-
end
|
92
|
-
|
93
|
-
# Indicate whether this current loop should process updates. Most loops
|
94
|
-
# will busy-wait (sleeping 0.1 seconds) until it's ready.
|
95
|
-
# @return [Boolean]
|
96
|
-
def should_run?
|
97
|
-
Time.zone.now - @info.last_sent - @config.delay_time >= @config.run_every
|
98
|
-
end
|
99
|
-
|
100
|
-
# @param record [ActiveRecord::Base]
|
101
|
-
# @return [ActiveSupport::TimeWithZone]
|
102
|
-
def last_updated(record)
|
103
|
-
record.public_send(@config.timestamp_column)
|
104
33
|
end
|
105
34
|
|
106
|
-
|
107
|
-
# @return [void]
|
108
|
-
def process_updates
|
109
|
-
return unless should_run?
|
110
|
-
|
111
|
-
time_from = @config.full_table ? Time.new(0) : @info.last_sent.in_time_zone
|
112
|
-
time_to = Time.zone.now - @config.delay_time
|
113
|
-
Deimos.config.logger.info("Polling #{@producer.topic} from #{time_from} to #{time_to}")
|
114
|
-
message_count = 0
|
115
|
-
batch_count = 0
|
116
|
-
error_count = 0
|
35
|
+
PollStatus = Struct.new(:batches_processed, :batches_errored, :messages_processed) do
|
117
36
|
|
118
|
-
#
|
119
|
-
|
120
|
-
|
121
|
-
Deimos.config.logger.debug("Polling #{@producer.topic}, batch #{batch_count + 1}")
|
122
|
-
batch = fetch_results(time_from, time_to).to_a
|
123
|
-
break if batch.empty?
|
124
|
-
|
125
|
-
if process_batch_with_span(batch)
|
126
|
-
batch_count += 1
|
127
|
-
else
|
128
|
-
error_count += 1
|
129
|
-
end
|
130
|
-
message_count += batch.size
|
131
|
-
time_from = last_updated(batch.last)
|
37
|
+
# @return [Integer]
|
38
|
+
def current_batch
|
39
|
+
batches_processed + 1
|
132
40
|
end
|
133
|
-
Deimos.config.logger.info("Poll #{@producer.topic} complete at #{time_to} (#{message_count} messages, #{batch_count} successful batches, #{error_count} batches errored}")
|
134
|
-
end
|
135
|
-
|
136
|
-
# @param time_from [ActiveSupport::TimeWithZone]
|
137
|
-
# @param time_to [ActiveSupport::TimeWithZone]
|
138
|
-
# @return [ActiveRecord::Relation]
|
139
|
-
def fetch_results(time_from, time_to)
|
140
|
-
id = @producer.config[:record_class].primary_key
|
141
|
-
quoted_timestamp = ActiveRecord::Base.connection.quote_column_name(@config.timestamp_column)
|
142
|
-
quoted_id = ActiveRecord::Base.connection.quote_column_name(id)
|
143
|
-
@producer.poll_query(time_from: time_from,
|
144
|
-
time_to: time_to,
|
145
|
-
column_name: @config.timestamp_column,
|
146
|
-
min_id: @info.last_sent_id).
|
147
|
-
limit(BATCH_SIZE).
|
148
|
-
order("#{quoted_timestamp}, #{quoted_id}")
|
149
|
-
end
|
150
41
|
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
retries = 0
|
155
|
-
begin
|
156
|
-
span = Deimos.config.tracer&.start(
|
157
|
-
'deimos-db-poller',
|
158
|
-
resource: @producer.class.name.gsub('::', '-')
|
159
|
-
)
|
160
|
-
process_batch(batch)
|
161
|
-
Deimos.config.tracer&.finish(span)
|
162
|
-
rescue Kafka::Error => e # keep trying till it fixes itself
|
163
|
-
Deimos.config.logger.error("Error publishing through DB Poller: #{e.message}")
|
164
|
-
sleep(0.5)
|
165
|
-
retry
|
166
|
-
rescue StandardError => e
|
167
|
-
Deimos.config.logger.error("Error publishing through DB poller: #{e.message}}")
|
168
|
-
if retries < @config.retries
|
169
|
-
retries += 1
|
170
|
-
sleep(0.5)
|
171
|
-
retry
|
172
|
-
else
|
173
|
-
Deimos.config.logger.error('Retries exceeded, moving on to next batch')
|
174
|
-
Deimos.config.tracer&.set_error(span, e)
|
175
|
-
self.touch_info(batch)
|
176
|
-
return false
|
177
|
-
end
|
42
|
+
# @return [String]
|
43
|
+
def report
|
44
|
+
"#{batches_processed} batches, #{batches_errored} errored batches, #{messages_processed} processed messages"
|
178
45
|
end
|
179
|
-
true
|
180
|
-
end
|
181
|
-
|
182
|
-
# @param batch [Array<ActiveRecord::Base>]
|
183
|
-
# @return [void]
|
184
|
-
def touch_info(batch)
|
185
|
-
record = batch.last
|
186
|
-
id_method = record.class.primary_key
|
187
|
-
last_id = record.public_send(id_method)
|
188
|
-
last_updated_at = last_updated(record)
|
189
|
-
@info.attributes = { last_sent: last_updated_at, last_sent_id: last_id }
|
190
|
-
@info.save!
|
191
|
-
end
|
192
|
-
|
193
|
-
# @param batch [Array<ActiveRecord::Base>]
|
194
|
-
# @return [void]
|
195
|
-
def process_batch(batch)
|
196
|
-
@producer.send_events(batch)
|
197
|
-
self.touch_info(batch)
|
198
46
|
end
|
199
47
|
end
|
200
48
|
end
|
201
49
|
end
|
50
|
+
|
51
|
+
require 'deimos/utils/db_poller/base'
|
52
|
+
require 'deimos/utils/db_poller/time_based'
|
53
|
+
require 'deimos/utils/db_poller/state_based'
|
data/lib/deimos/version.rb
CHANGED