deimos-ruby 1.17.1 → 1.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -1
- data/CHANGELOG.md +9 -0
- data/README.md +19 -0
- data/docs/CONFIGURATION.md +8 -3
- data/lib/deimos/config/configuration.rb +19 -7
- data/lib/deimos/schema_backends/mock.rb +1 -1
- data/lib/deimos/utils/db_poller/base.rb +139 -0
- data/lib/deimos/utils/db_poller/state_based.rb +57 -0
- data/lib/deimos/utils/db_poller/time_based.rb +82 -0
- data/lib/deimos/utils/db_poller.rb +22 -170
- data/lib/deimos/version.rb +1 -1
- data/rbs_collection.lock.yaml +43 -19
- data/sig/defs.rbs +234 -173
- data/spec/utils/db_poller_spec.rb +48 -35
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 93b418353fa710fd90425980ec2772f6cb4249b1d4e43c3cadd9fb7a099eb387
|
4
|
+
data.tar.gz: b319ef0e32b2e147b7ec0a3b131d8d8269e9818faa5313b6f56745498a2942c3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 60bd9c6f8cb7e4a63d791312b2fb784cfed4f2b0c60f679d71f1fff1adeac8ca86cbf285e42bf24fb1e65748fe5c1a0127710925d068c5d6bf6feeb4753ed203
|
7
|
+
data.tar.gz: 9e30a8666f75d62fcee4c5f99de3737af5b58c6bafc9c54f786784785472233dc6a9135c525b288d04320acb716b58274b9dcaeeba90965a04721e0e77588c83
|
data/.rubocop.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -7,6 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
7
7
|
|
8
8
|
## UNRELEASED
|
9
9
|
|
10
|
+
# 1.18.0 - 2022-11-01
|
11
|
+
|
12
|
+
### Features :star:
|
13
|
+
|
14
|
+
- Add the `state_based` mode for DB pollers.
|
15
|
+
|
16
|
+
### Fixes :wrench:
|
17
|
+
- Fix the mock schema backend's `encode_key` method so it doesn't crash when used in application code.
|
18
|
+
|
10
19
|
# 1.17.1 - 2022-10-20
|
11
20
|
|
12
21
|
- Fix the log message for publishing messages so it uses the topic of the actual message instead of
|
data/README.md
CHANGED
@@ -803,6 +803,25 @@ end
|
|
803
803
|
Note that the poller will retry infinitely if it encounters a Kafka-related error such
|
804
804
|
as a communication failure. For all other errors, it will retry once by default.
|
805
805
|
|
806
|
+
### State-based pollers
|
807
|
+
|
808
|
+
By default, pollers use timestamps and IDs to determine the records to publish. However, you can
|
809
|
+
set a different mode whereby it will include all records that match your query, and when done,
|
810
|
+
will update a state and/or timestamp column which should remove it from that query. With this
|
811
|
+
algorithm, you can ignore the `updated_at` and `id` columns.
|
812
|
+
|
813
|
+
To configure a state-based poller:
|
814
|
+
|
815
|
+
```ruby
|
816
|
+
db_poller do
|
817
|
+
mode :state_based
|
818
|
+
state_column :publish_state # the name of the column to update state to
|
819
|
+
publish_timestamp_column :published_at # the column to update when publishing succeeds
|
820
|
+
published_state 'published' # the value to put into the state_column when publishing succeeds
|
821
|
+
failed_state 'publish_failed' the value to put into the state_column when publishing fails
|
822
|
+
end
|
823
|
+
```
|
824
|
+
|
806
825
|
## Running consumers
|
807
826
|
|
808
827
|
Deimos includes a rake task. Once it's in your gemfile, just run
|
data/docs/CONFIGURATION.md
CHANGED
@@ -112,14 +112,19 @@ end
|
|
112
112
|
```
|
113
113
|
|
114
114
|
Config name|Default|Description
|
115
|
-
|
115
|
+
-----------|--|-----------
|
116
116
|
producer_class|nil|ActiveRecordProducer class to use for sending messages.
|
117
|
+
mode|:time_based|Whether to use time-based polling or state-based polling.
|
117
118
|
run_every|60|Amount of time in seconds to wait between runs.
|
118
119
|
timestamp_column|`:updated_at`|Name of the column to query. Remember to add an index to this column!
|
119
120
|
delay_time|2|Amount of time in seconds to wait before picking up records, to allow for transactions to finish.
|
120
|
-
full_table|false|If set to true, do a full table dump to Kafka each run. Good for very small tables.
|
121
|
-
start_from_beginning|true|If false, start from the current time instead of the beginning of time if this is the first time running the poller.
|
122
121
|
retries|1|The number of times to retry for a *non-Kafka* error.
|
122
|
+
full_table|false|If set to true, do a full table dump to Kafka each run. Good for very small tables. Time-based only.
|
123
|
+
start_from_beginning|true|If false, start from the current time instead of the beginning of time if this is the first time running the poller. Time-based only.
|
124
|
+
state_column|nil|If set, this represents the DB column to use to update publishing status. State-based only.
|
125
|
+
publish_timestamp_column|nil|If set, this represents the DB column to use to update when publishing is done. State-based only.
|
126
|
+
published_state|nil|If set, the poller will update the `state_column` to this value when publishing succeeds. State-based only.
|
127
|
+
failed_state|nil|If set, the poller will update the `state_column` to this value when publishing fails. State-based only.
|
123
128
|
|
124
129
|
## Kafka Configuration
|
125
130
|
|
@@ -444,25 +444,37 @@ module Deimos
|
|
444
444
|
end
|
445
445
|
|
446
446
|
setting_object :db_poller do
|
447
|
+
# Mode to use for querying - :time_based (via updated_at) or :state_based.
|
448
|
+
setting :mode, :time_based
|
447
449
|
# Producer class to use for the poller.
|
448
450
|
setting :producer_class
|
449
451
|
# How often to run the poller, in seconds. If the poll takes longer than this
|
450
452
|
# time, it will run again immediately and the timeout
|
451
453
|
# will be pushed to the next e.g. 1 minute.
|
452
454
|
setting :run_every, 60
|
453
|
-
#
|
454
|
-
setting :
|
455
|
+
# The number of times to retry production when encountering a *non-Kafka* error.
|
456
|
+
setting :retries, 1
|
455
457
|
# Amount of time, in seconds, to wait before catching updates, to allow transactions
|
456
|
-
# to complete but still pick up the right records.
|
458
|
+
# to complete but still pick up the right records. Should only be set for time-based mode.
|
457
459
|
setting :delay_time, 2
|
460
|
+
# Column to use to find updates. Must have an index on it.
|
461
|
+
setting :timestamp_column, :updated_at
|
462
|
+
|
458
463
|
# If true, dump the full table rather than incremental changes. Should
|
459
|
-
# only be used for very small tables.
|
464
|
+
# only be used for very small tables. Time-based only.
|
460
465
|
setting :full_table, false
|
461
466
|
# If false, start from the current time instead of the beginning of time
|
462
|
-
# if this is the first time running the poller.
|
467
|
+
# if this is the first time running the poller. Time-based only.
|
463
468
|
setting :start_from_beginning, true
|
464
|
-
|
465
|
-
|
469
|
+
|
470
|
+
# Column to set once publishing is complete - state-based only.
|
471
|
+
setting :state_column
|
472
|
+
# Column to update with e.g. published_at. State-based only.
|
473
|
+
setting :publish_timestamp_column
|
474
|
+
# Value to set the state_column to once published - state-based only.
|
475
|
+
setting :published_state
|
476
|
+
# Value to set the state_column to if publishing fails - state-based only.
|
477
|
+
setting :failed_state
|
466
478
|
end
|
467
479
|
|
468
480
|
deprecate 'kafka_logger', 'kafka.logger'
|
@@ -0,0 +1,139 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'deimos/utils/db_poller'
|
4
|
+
require 'deimos/poll_info'
|
5
|
+
require 'sigurd'
|
6
|
+
|
7
|
+
module Deimos
|
8
|
+
module Utils
|
9
|
+
# Class which continually polls the database and sends Kafka messages.
|
10
|
+
module DbPoller
|
11
|
+
# Base poller class for retrieving and publishing messages.
|
12
|
+
class Base
|
13
|
+
|
14
|
+
# @return [Integer]
|
15
|
+
BATCH_SIZE = 1000
|
16
|
+
|
17
|
+
# Needed for Executor so it can identify the worker
|
18
|
+
# @return [Integer]
|
19
|
+
attr_reader :id
|
20
|
+
|
21
|
+
# @return [Hash]
|
22
|
+
attr_reader :config
|
23
|
+
|
24
|
+
# @param config [FigTree::ConfigStruct]
|
25
|
+
def initialize(config)
|
26
|
+
@config = config
|
27
|
+
@id = SecureRandom.hex
|
28
|
+
begin
|
29
|
+
@producer = @config.producer_class.constantize
|
30
|
+
rescue NameError
|
31
|
+
raise "Class #{@config.producer_class} not found!"
|
32
|
+
end
|
33
|
+
unless @producer < Deimos::ActiveRecordProducer
|
34
|
+
raise "Class #{@producer.class.name} is not an ActiveRecordProducer!"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# Start the poll:
|
39
|
+
# 1) Grab the current PollInfo from the database indicating the last
|
40
|
+
# time we ran
|
41
|
+
# 2) On a loop, process all the recent updates between the last time
|
42
|
+
# we ran and now.
|
43
|
+
# @return [void]
|
44
|
+
def start
|
45
|
+
# Don't send asynchronously
|
46
|
+
if Deimos.config.producers.backend == :kafka_async
|
47
|
+
Deimos.config.producers.backend = :kafka
|
48
|
+
end
|
49
|
+
Deimos.config.logger.info('Starting...')
|
50
|
+
@signal_to_stop = false
|
51
|
+
ActiveRecord::Base.connection.reconnect! unless ActiveRecord::Base.connection.open_transactions.positive?
|
52
|
+
|
53
|
+
retrieve_poll_info
|
54
|
+
loop do
|
55
|
+
if @signal_to_stop
|
56
|
+
Deimos.config.logger.info('Shutting down')
|
57
|
+
break
|
58
|
+
end
|
59
|
+
process_updates if should_run?
|
60
|
+
sleep(0.1)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# @return [void]
|
65
|
+
# Grab the PollInfo or create if it doesn't exist.
|
66
|
+
# @return [void]
|
67
|
+
def retrieve_poll_info
|
68
|
+
@info = Deimos::PollInfo.find_by_producer(@config.producer_class) || create_poll_info
|
69
|
+
end
|
70
|
+
|
71
|
+
# @return [Deimos::PollInfo]
|
72
|
+
def create_poll_info
|
73
|
+
Deimos::PollInfo.create!(producer: @config.producer_class, last_sent: Time.new(0))
|
74
|
+
end
|
75
|
+
|
76
|
+
# Indicate whether this current loop should process updates. Most loops
|
77
|
+
# will busy-wait (sleeping 0.1 seconds) until it's ready.
|
78
|
+
# @return [Boolean]
|
79
|
+
def should_run?
|
80
|
+
Time.zone.now - @info.last_sent - @config.delay_time >= @config.run_every
|
81
|
+
end
|
82
|
+
|
83
|
+
# Stop the poll.
|
84
|
+
# @return [void]
|
85
|
+
def stop
|
86
|
+
Deimos.config.logger.info('Received signal to stop')
|
87
|
+
@signal_to_stop = true
|
88
|
+
end
|
89
|
+
|
90
|
+
# Send messages for updated data.
|
91
|
+
# @return [void]
|
92
|
+
def process_updates
|
93
|
+
raise Deimos::MissingImplementationError
|
94
|
+
end
|
95
|
+
|
96
|
+
# @param batch [Array<ActiveRecord::Base>]
|
97
|
+
# @param status [PollStatus]
|
98
|
+
# @return [Boolean]
|
99
|
+
def process_batch_with_span(batch, status)
|
100
|
+
retries = 0
|
101
|
+
begin
|
102
|
+
span = Deimos.config.tracer&.start(
|
103
|
+
'deimos-db-poller',
|
104
|
+
resource: @producer.class.name.gsub('::', '-')
|
105
|
+
)
|
106
|
+
process_batch(batch)
|
107
|
+
Deimos.config.tracer&.finish(span)
|
108
|
+
status.batches_processed += 1
|
109
|
+
rescue Kafka::Error => e # keep trying till it fixes itself
|
110
|
+
Deimos.config.logger.error("Error publishing through DB Poller: #{e.message}")
|
111
|
+
sleep(0.5)
|
112
|
+
retry
|
113
|
+
rescue StandardError => e
|
114
|
+
Deimos.config.logger.error("Error publishing through DB poller: #{e.message}}")
|
115
|
+
if retries < @config.retries
|
116
|
+
retries += 1
|
117
|
+
sleep(0.5)
|
118
|
+
retry
|
119
|
+
else
|
120
|
+
Deimos.config.logger.error('Retries exceeded, moving on to next batch')
|
121
|
+
Deimos.config.tracer&.set_error(span, e)
|
122
|
+
status.batches_errored += 1
|
123
|
+
return false
|
124
|
+
end
|
125
|
+
ensure
|
126
|
+
status.messages_processed += batch.size
|
127
|
+
end
|
128
|
+
true
|
129
|
+
end
|
130
|
+
|
131
|
+
# @param batch [Array<ActiveRecord::Base>]
|
132
|
+
# @return [void]
|
133
|
+
def process_batch(batch)
|
134
|
+
@producer.send_events(batch)
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'deimos/utils/db_poller/base'
|
4
|
+
|
5
|
+
module Deimos
|
6
|
+
module Utils
|
7
|
+
module DbPoller
|
8
|
+
# Poller that uses state columns to determine the records to publish.
|
9
|
+
class StateBased < Base
|
10
|
+
# Send messages for updated data.
|
11
|
+
# @return [void]
|
12
|
+
def process_updates
|
13
|
+
Deimos.config.logger.info("Polling #{@producer.topic}")
|
14
|
+
status = PollStatus.new(0, 0, 0)
|
15
|
+
|
16
|
+
# poll_query gets all the relevant data from the database, as defined
|
17
|
+
# by the producer itself.
|
18
|
+
loop do
|
19
|
+
Deimos.config.logger.debug("Polling #{@producer.topic}, batch #{status.current_batch}")
|
20
|
+
batch = fetch_results.to_a
|
21
|
+
break if batch.empty?
|
22
|
+
|
23
|
+
success = process_batch_with_span(batch, status)
|
24
|
+
finalize_batch(batch, success)
|
25
|
+
end
|
26
|
+
Deimos.config.logger.info("Poll #{@producer.topic} complete (#{status.report}")
|
27
|
+
end
|
28
|
+
|
29
|
+
# @return [ActiveRecord::Relation]
|
30
|
+
def fetch_results
|
31
|
+
@producer.poll_query.limit(BATCH_SIZE).order(@config.timestamp_column)
|
32
|
+
end
|
33
|
+
|
34
|
+
# @param batch [Array<ActiveRecord::Base>]
|
35
|
+
# @param success [Boolean]
|
36
|
+
# @return [void]
|
37
|
+
def finalize_batch(batch, success)
|
38
|
+
@info.touch
|
39
|
+
|
40
|
+
state = success ? @config.published_state : @config.failed_state
|
41
|
+
klass = batch.first.class
|
42
|
+
id_col = record.class.primary_key
|
43
|
+
timestamp_col = @config.timestamp_column
|
44
|
+
|
45
|
+
attrs = { timestamp_col => Time.zone.now }
|
46
|
+
attrs[@config.state_column] = state if state
|
47
|
+
if @config.publish_timestamp_column
|
48
|
+
attrs[@config.publish_timestamp_column] = Time.zone.now
|
49
|
+
end
|
50
|
+
|
51
|
+
klass.where(id_col => batch.map(&id_col)).update_all(attrs)
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'deimos/utils/db_poller/base'
|
4
|
+
|
5
|
+
module Deimos
|
6
|
+
module Utils
|
7
|
+
module DbPoller
|
8
|
+
# Poller that uses ID and updated_at to determine the records to publish.
|
9
|
+
class TimeBased < Base
|
10
|
+
|
11
|
+
# :nodoc:
|
12
|
+
def create_poll_info
|
13
|
+
new_time = @config.start_from_beginning ? Time.new(0) : Time.zone.now
|
14
|
+
Deimos::PollInfo.create!(producer: @config.producer_class,
|
15
|
+
last_sent: new_time,
|
16
|
+
last_sent_id: 0)
|
17
|
+
end
|
18
|
+
|
19
|
+
# @param batch [Array<ActiveRecord::Base>]
|
20
|
+
# @param status [Deimos::Utils::DbPoller::PollStatus]
|
21
|
+
def process_and_touch_info(batch, status)
|
22
|
+
process_batch_with_span(batch, status)
|
23
|
+
self.touch_info(batch)
|
24
|
+
end
|
25
|
+
|
26
|
+
# Send messages for updated data.
|
27
|
+
# @return [void]
|
28
|
+
def process_updates
|
29
|
+
time_from = @config.full_table ? Time.new(0) : @info.last_sent.in_time_zone
|
30
|
+
time_to = Time.zone.now - @config.delay_time
|
31
|
+
Deimos.config.logger.info("Polling #{@producer.topic} from #{time_from} to #{time_to}")
|
32
|
+
status = PollStatus.new(0, 0, 0)
|
33
|
+
|
34
|
+
# poll_query gets all the relevant data from the database, as defined
|
35
|
+
# by the producer itself.
|
36
|
+
loop do
|
37
|
+
Deimos.config.logger.debug("Polling #{@producer.topic}, batch #{status.current_batch}")
|
38
|
+
batch = fetch_results(time_from, time_to).to_a
|
39
|
+
break if batch.empty?
|
40
|
+
|
41
|
+
process_and_touch_info(batch, status)
|
42
|
+
time_from = last_updated(batch.last)
|
43
|
+
end
|
44
|
+
Deimos.config.logger.info("Poll #{@producer.topic} complete at #{time_to} (#{status.report})")
|
45
|
+
end
|
46
|
+
|
47
|
+
# @param time_from [ActiveSupport::TimeWithZone]
|
48
|
+
# @param time_to [ActiveSupport::TimeWithZone]
|
49
|
+
# @return [ActiveRecord::Relation]
|
50
|
+
def fetch_results(time_from, time_to)
|
51
|
+
id = @producer.config[:record_class].primary_key
|
52
|
+
quoted_timestamp = ActiveRecord::Base.connection.quote_column_name(@config.timestamp_column)
|
53
|
+
quoted_id = ActiveRecord::Base.connection.quote_column_name(id)
|
54
|
+
@producer.poll_query(time_from: time_from,
|
55
|
+
time_to: time_to,
|
56
|
+
column_name: @config.timestamp_column,
|
57
|
+
min_id: @info.last_sent_id).
|
58
|
+
limit(BATCH_SIZE).
|
59
|
+
order("#{quoted_timestamp}, #{quoted_id}")
|
60
|
+
end
|
61
|
+
|
62
|
+
# @param record [ActiveRecord::Base]
|
63
|
+
# @return [ActiveSupport::TimeWithZone]
|
64
|
+
def last_updated(record)
|
65
|
+
record.public_send(@config.timestamp_column)
|
66
|
+
end
|
67
|
+
|
68
|
+
# @param batch [Array<ActiveRecord::Base>]
|
69
|
+
# @return [void]
|
70
|
+
def touch_info(batch)
|
71
|
+
record = batch.last
|
72
|
+
id_method = record.class.primary_key
|
73
|
+
last_id = record.public_send(id_method)
|
74
|
+
last_updated_at = last_updated(record)
|
75
|
+
@info.attributes = { last_sent: last_updated_at, last_sent_id: last_id }
|
76
|
+
@info.save!
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
@@ -1,22 +1,9 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require 'deimos/poll_info'
|
4
|
-
require 'sigurd'
|
5
|
-
|
6
3
|
module Deimos
|
7
4
|
module Utils
|
8
|
-
#
|
9
|
-
|
10
|
-
# @return [Integer]
|
11
|
-
BATCH_SIZE = 1000
|
12
|
-
|
13
|
-
# Needed for Executor so it can identify the worker
|
14
|
-
# @return [Integer]
|
15
|
-
attr_reader :id
|
16
|
-
|
17
|
-
# @return [Hash]
|
18
|
-
attr_reader :config
|
19
|
-
|
5
|
+
# Overall functionality related to DB poller.
|
6
|
+
module DbPoller
|
20
7
|
# Begin the DB Poller process.
|
21
8
|
# @return [void]
|
22
9
|
def self.start!
|
@@ -25,7 +12,7 @@ module Deimos
|
|
25
12
|
end
|
26
13
|
|
27
14
|
pollers = Deimos.config.db_poller_objects.map do |poller_config|
|
28
|
-
self.new(poller_config)
|
15
|
+
self.class_for_config(poller_config.mode).new(poller_config)
|
29
16
|
end
|
30
17
|
executor = Sigurd::Executor.new(pollers,
|
31
18
|
sleep_seconds: 5,
|
@@ -34,168 +21,33 @@ module Deimos
|
|
34
21
|
signal_handler.run!
|
35
22
|
end
|
36
23
|
|
37
|
-
# @param
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
end
|
46
|
-
unless @producer < Deimos::ActiveRecordProducer
|
47
|
-
raise "Class #{@producer.class.name} is not an ActiveRecordProducer!"
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
# Start the poll:
|
52
|
-
# 1) Grab the current PollInfo from the database indicating the last
|
53
|
-
# time we ran
|
54
|
-
# 2) On a loop, process all the recent updates between the last time
|
55
|
-
# we ran and now.
|
56
|
-
# @return [void]
|
57
|
-
def start
|
58
|
-
# Don't send asynchronously
|
59
|
-
if Deimos.config.producers.backend == :kafka_async
|
60
|
-
Deimos.config.producers.backend = :kafka
|
24
|
+
# @param config_name [Symbol]
|
25
|
+
# @return [Class<Deimos::Utils::DbPoller>]
|
26
|
+
def self.class_for_config(config_name)
|
27
|
+
case config_name
|
28
|
+
when :state_based
|
29
|
+
Deimos::Utils::DbPoller::StateBased
|
30
|
+
else
|
31
|
+
Deimos::Utils::DbPoller::TimeBased
|
61
32
|
end
|
62
|
-
Deimos.config.logger.info('Starting...')
|
63
|
-
@signal_to_stop = false
|
64
|
-
retrieve_poll_info
|
65
|
-
loop do
|
66
|
-
if @signal_to_stop
|
67
|
-
Deimos.config.logger.info('Shutting down')
|
68
|
-
break
|
69
|
-
end
|
70
|
-
process_updates
|
71
|
-
sleep 0.1
|
72
|
-
end
|
73
|
-
end
|
74
|
-
|
75
|
-
# Grab the PollInfo or create if it doesn't exist.
|
76
|
-
# @return [void]
|
77
|
-
def retrieve_poll_info
|
78
|
-
ActiveRecord::Base.connection.reconnect! unless ActiveRecord::Base.connection.open_transactions.positive?
|
79
|
-
new_time = @config.start_from_beginning ? Time.new(0) : Time.zone.now
|
80
|
-
@info = Deimos::PollInfo.find_by_producer(@config.producer_class) ||
|
81
|
-
Deimos::PollInfo.create!(producer: @config.producer_class,
|
82
|
-
last_sent: new_time,
|
83
|
-
last_sent_id: 0)
|
84
|
-
end
|
85
|
-
|
86
|
-
# Stop the poll.
|
87
|
-
# @return [void]
|
88
|
-
def stop
|
89
|
-
Deimos.config.logger.info('Received signal to stop')
|
90
|
-
@signal_to_stop = true
|
91
|
-
end
|
92
|
-
|
93
|
-
# Indicate whether this current loop should process updates. Most loops
|
94
|
-
# will busy-wait (sleeping 0.1 seconds) until it's ready.
|
95
|
-
# @return [Boolean]
|
96
|
-
def should_run?
|
97
|
-
Time.zone.now - @info.last_sent - @config.delay_time >= @config.run_every
|
98
|
-
end
|
99
|
-
|
100
|
-
# @param record [ActiveRecord::Base]
|
101
|
-
# @return [ActiveSupport::TimeWithZone]
|
102
|
-
def last_updated(record)
|
103
|
-
record.public_send(@config.timestamp_column)
|
104
33
|
end
|
105
34
|
|
106
|
-
|
107
|
-
# @return [void]
|
108
|
-
def process_updates
|
109
|
-
return unless should_run?
|
110
|
-
|
111
|
-
time_from = @config.full_table ? Time.new(0) : @info.last_sent.in_time_zone
|
112
|
-
time_to = Time.zone.now - @config.delay_time
|
113
|
-
Deimos.config.logger.info("Polling #{@producer.topic} from #{time_from} to #{time_to}")
|
114
|
-
message_count = 0
|
115
|
-
batch_count = 0
|
116
|
-
error_count = 0
|
35
|
+
PollStatus = Struct.new(:batches_processed, :batches_errored, :messages_processed) do
|
117
36
|
|
118
|
-
#
|
119
|
-
|
120
|
-
|
121
|
-
Deimos.config.logger.debug("Polling #{@producer.topic}, batch #{batch_count + 1}")
|
122
|
-
batch = fetch_results(time_from, time_to).to_a
|
123
|
-
break if batch.empty?
|
124
|
-
|
125
|
-
if process_batch_with_span(batch)
|
126
|
-
batch_count += 1
|
127
|
-
else
|
128
|
-
error_count += 1
|
129
|
-
end
|
130
|
-
message_count += batch.size
|
131
|
-
time_from = last_updated(batch.last)
|
37
|
+
# @return [Integer]
|
38
|
+
def current_batch
|
39
|
+
batches_processed + 1
|
132
40
|
end
|
133
|
-
Deimos.config.logger.info("Poll #{@producer.topic} complete at #{time_to} (#{message_count} messages, #{batch_count} successful batches, #{error_count} batches errored}")
|
134
|
-
end
|
135
|
-
|
136
|
-
# @param time_from [ActiveSupport::TimeWithZone]
|
137
|
-
# @param time_to [ActiveSupport::TimeWithZone]
|
138
|
-
# @return [ActiveRecord::Relation]
|
139
|
-
def fetch_results(time_from, time_to)
|
140
|
-
id = @producer.config[:record_class].primary_key
|
141
|
-
quoted_timestamp = ActiveRecord::Base.connection.quote_column_name(@config.timestamp_column)
|
142
|
-
quoted_id = ActiveRecord::Base.connection.quote_column_name(id)
|
143
|
-
@producer.poll_query(time_from: time_from,
|
144
|
-
time_to: time_to,
|
145
|
-
column_name: @config.timestamp_column,
|
146
|
-
min_id: @info.last_sent_id).
|
147
|
-
limit(BATCH_SIZE).
|
148
|
-
order("#{quoted_timestamp}, #{quoted_id}")
|
149
|
-
end
|
150
41
|
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
retries = 0
|
155
|
-
begin
|
156
|
-
span = Deimos.config.tracer&.start(
|
157
|
-
'deimos-db-poller',
|
158
|
-
resource: @producer.class.name.gsub('::', '-')
|
159
|
-
)
|
160
|
-
process_batch(batch)
|
161
|
-
Deimos.config.tracer&.finish(span)
|
162
|
-
rescue Kafka::Error => e # keep trying till it fixes itself
|
163
|
-
Deimos.config.logger.error("Error publishing through DB Poller: #{e.message}")
|
164
|
-
sleep(0.5)
|
165
|
-
retry
|
166
|
-
rescue StandardError => e
|
167
|
-
Deimos.config.logger.error("Error publishing through DB poller: #{e.message}}")
|
168
|
-
if retries < @config.retries
|
169
|
-
retries += 1
|
170
|
-
sleep(0.5)
|
171
|
-
retry
|
172
|
-
else
|
173
|
-
Deimos.config.logger.error('Retries exceeded, moving on to next batch')
|
174
|
-
Deimos.config.tracer&.set_error(span, e)
|
175
|
-
self.touch_info(batch)
|
176
|
-
return false
|
177
|
-
end
|
42
|
+
# @return [String]
|
43
|
+
def report
|
44
|
+
"#{batches_processed} batches, #{batches_errored} errored batches, #{messages_processed} processed messages"
|
178
45
|
end
|
179
|
-
true
|
180
|
-
end
|
181
|
-
|
182
|
-
# @param batch [Array<ActiveRecord::Base>]
|
183
|
-
# @return [void]
|
184
|
-
def touch_info(batch)
|
185
|
-
record = batch.last
|
186
|
-
id_method = record.class.primary_key
|
187
|
-
last_id = record.public_send(id_method)
|
188
|
-
last_updated_at = last_updated(record)
|
189
|
-
@info.attributes = { last_sent: last_updated_at, last_sent_id: last_id }
|
190
|
-
@info.save!
|
191
|
-
end
|
192
|
-
|
193
|
-
# @param batch [Array<ActiveRecord::Base>]
|
194
|
-
# @return [void]
|
195
|
-
def process_batch(batch)
|
196
|
-
@producer.send_events(batch)
|
197
|
-
self.touch_info(batch)
|
198
46
|
end
|
199
47
|
end
|
200
48
|
end
|
201
49
|
end
|
50
|
+
|
51
|
+
require 'deimos/utils/db_poller/base'
|
52
|
+
require 'deimos/utils/db_poller/time_based'
|
53
|
+
require 'deimos/utils/db_poller/state_based'
|
data/lib/deimos/version.rb
CHANGED