deimos-ruby 1.6.2 → 1.8.0.pre.beta2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.circleci/config.yml +9 -0
- data/.rubocop.yml +15 -13
- data/.ruby-version +1 -1
- data/CHANGELOG.md +31 -0
- data/Gemfile.lock +43 -36
- data/README.md +141 -16
- data/Rakefile +1 -1
- data/deimos-ruby.gemspec +2 -1
- data/docs/ARCHITECTURE.md +144 -0
- data/docs/CONFIGURATION.md +27 -0
- data/lib/deimos.rb +7 -6
- data/lib/deimos/active_record_consume/batch_consumption.rb +159 -0
- data/lib/deimos/active_record_consume/batch_slicer.rb +27 -0
- data/lib/deimos/active_record_consume/message_consumption.rb +58 -0
- data/lib/deimos/active_record_consume/schema_model_converter.rb +52 -0
- data/lib/deimos/active_record_consumer.rb +33 -75
- data/lib/deimos/active_record_producer.rb +23 -0
- data/lib/deimos/batch_consumer.rb +2 -140
- data/lib/deimos/config/configuration.rb +28 -10
- data/lib/deimos/consume/batch_consumption.rb +150 -0
- data/lib/deimos/consume/message_consumption.rb +94 -0
- data/lib/deimos/consumer.rb +79 -69
- data/lib/deimos/kafka_message.rb +1 -1
- data/lib/deimos/kafka_topic_info.rb +1 -1
- data/lib/deimos/message.rb +6 -1
- data/lib/deimos/metrics/provider.rb +0 -2
- data/lib/deimos/poll_info.rb +9 -0
- data/lib/deimos/tracing/provider.rb +0 -2
- data/lib/deimos/utils/db_poller.rb +149 -0
- data/lib/deimos/utils/db_producer.rb +8 -3
- data/lib/deimos/utils/deadlock_retry.rb +68 -0
- data/lib/deimos/utils/lag_reporter.rb +19 -26
- data/lib/deimos/version.rb +1 -1
- data/lib/generators/deimos/db_poller/templates/migration +11 -0
- data/lib/generators/deimos/db_poller/templates/rails3_migration +16 -0
- data/lib/generators/deimos/db_poller_generator.rb +48 -0
- data/lib/tasks/deimos.rake +7 -0
- data/spec/active_record_batch_consumer_spec.rb +481 -0
- data/spec/active_record_consume/batch_slicer_spec.rb +42 -0
- data/spec/active_record_consume/schema_model_converter_spec.rb +105 -0
- data/spec/active_record_consumer_spec.rb +3 -11
- data/spec/active_record_producer_spec.rb +66 -88
- data/spec/batch_consumer_spec.rb +24 -7
- data/spec/config/configuration_spec.rb +4 -0
- data/spec/consumer_spec.rb +8 -8
- data/spec/deimos_spec.rb +57 -49
- data/spec/handlers/my_batch_consumer.rb +6 -1
- data/spec/handlers/my_consumer.rb +6 -1
- data/spec/message_spec.rb +19 -0
- data/spec/producer_spec.rb +3 -3
- data/spec/rake_spec.rb +1 -1
- data/spec/schemas/com/my-namespace/MySchemaCompound-key.avsc +18 -0
- data/spec/schemas/com/my-namespace/Wibble.avsc +43 -0
- data/spec/spec_helper.rb +61 -6
- data/spec/utils/db_poller_spec.rb +320 -0
- data/spec/utils/deadlock_retry_spec.rb +74 -0
- data/spec/utils/lag_reporter_spec.rb +29 -22
- metadata +55 -20
- data/lib/deimos/base_consumer.rb +0 -104
- data/lib/deimos/utils/executor.rb +0 -124
- data/lib/deimos/utils/platform_schema_validation.rb +0 -0
- data/lib/deimos/utils/signal_handler.rb +0 -68
- data/spec/utils/executor_spec.rb +0 -53
- data/spec/utils/signal_handler_spec.rb +0 -16
data/Rakefile
CHANGED
data/deimos-ruby.gemspec
CHANGED
@@ -21,11 +21,12 @@ Gem::Specification.new do |spec|
|
|
21
21
|
spec.add_runtime_dependency('avro_turf', '~> 0.11')
|
22
22
|
spec.add_runtime_dependency('phobos', '~> 1.9')
|
23
23
|
spec.add_runtime_dependency('ruby-kafka', '~> 0.7')
|
24
|
+
spec.add_runtime_dependency('sigurd', '0.0.1')
|
24
25
|
|
25
26
|
spec.add_development_dependency('activerecord', '~> 5.2')
|
26
27
|
spec.add_development_dependency('activerecord-import')
|
27
28
|
spec.add_development_dependency('avro', '~> 1.9')
|
28
|
-
spec.add_development_dependency('
|
29
|
+
spec.add_development_dependency('database_cleaner', '~> 1.7')
|
29
30
|
spec.add_development_dependency('ddtrace', '~> 0.11')
|
30
31
|
spec.add_development_dependency('dogstatsd-ruby', '~> 4.2')
|
31
32
|
spec.add_development_dependency('guard', '~> 2')
|
@@ -0,0 +1,144 @@
|
|
1
|
+
# Deimos Architecture
|
2
|
+
|
3
|
+
Deimos is the third of three libraries that add functionality on top of each
|
4
|
+
other:
|
5
|
+
|
6
|
+
* [RubyKafka](https://github.com/zendesk/ruby-kafka) is the low-level Kafka
|
7
|
+
client, providing API's for producers, consumers and the client as a whole.
|
8
|
+
* [Phobos](https://github.com/phobos/phobos) is a lightweight wrapper on top
|
9
|
+
of RubyKafka that provides threaded consumers, a simpler way to write
|
10
|
+
producers, and lifecycle management.
|
11
|
+
* [Deimos](https://github.com/flipp-oss/deimos/) is a full-featured framework
|
12
|
+
using Phobos as its base which provides schema integration (e.g. Avro),
|
13
|
+
database integration, metrics, tracing, test helpers and other utilities.
|
14
|
+
|
15
|
+
## Folder structure
|
16
|
+
|
17
|
+
As of May 12, 2020, the following are the important files to understand in how
|
18
|
+
Deimos fits together:
|
19
|
+
* `lib/generators`: Generators to generate database migrations, e.g.
|
20
|
+
for the DB Poller and DB Producer features.
|
21
|
+
* `lib/tasks`: Rake tasks for starting consumers, DB Pollers, etc.
|
22
|
+
* `lib/deimos`: Main Deimos code.
|
23
|
+
* `lib/deimos/deimos.rb`: The bootstrap / startup code for Deimos. Also provides
|
24
|
+
some global convenience methods and (for legacy purposes) the way to
|
25
|
+
start the DB Producer.
|
26
|
+
* `lib/deimos/backends`: The different plug-in producer backends - e.g. produce
|
27
|
+
directly to Kafka, use the DB backend, etc.
|
28
|
+
* `lib/deimos/schema_backends`: The different plug-in schema handlers, such
|
29
|
+
as the various flavors of Avro (with/without schema registry etc.)
|
30
|
+
* `lib/deimos/metrics`: The different plug-in metrics providers, e.g. Datadog.
|
31
|
+
* `lib/deimos/tracing`: The different plug-in tracing providers, e.g. Datadog.
|
32
|
+
* `lib/deimos/utils`: Utility classes for things not directly related to
|
33
|
+
producing and consuming, such as the DB Poller, DB Producer, lag reporter, etc.
|
34
|
+
* `lib/deimos/config`: Classes related to configuring Deimos.
|
35
|
+
* `lib/deimos/monkey_patches`: Monkey patches to existing libraries. These
|
36
|
+
should be removed in a future update.
|
37
|
+
|
38
|
+
## Features
|
39
|
+
|
40
|
+
### Producers and Consumers
|
41
|
+
|
42
|
+
Both producers and consumers include the `SharedConfig` module, which
|
43
|
+
standardizes configuration like schema settings, topic, keys, etc.
|
44
|
+
|
45
|
+
Consumers come in two flavors: `Consumer` and `BatchConsumer`. Both include
|
46
|
+
`BaseConsumer` for shared functionality.
|
47
|
+
|
48
|
+
While producing messages go to Kafka by default, literally anything else
|
49
|
+
can happen when your producer calls `produce`, by swapping out the producer
|
50
|
+
_backend_. This is just a file that needs to inherit from `Deimos::Backends::Base`
|
51
|
+
and must implement a single method, `execute`.
|
52
|
+
|
53
|
+
Producers have a complex workflow while processing the payload to publish. This
|
54
|
+
is aided by the `Deimos::Message` class (not to be confused with the
|
55
|
+
`KafkaMessage` class, which is an ActiveRecord used by the DB Producer feature,
|
56
|
+
below).
|
57
|
+
|
58
|
+
### Schemas
|
59
|
+
|
60
|
+
Schema backends are used to encode and decode payloads into different formats
|
61
|
+
such as Avro. These are integrated with producers and consumers, as well
|
62
|
+
as test helpers. These are a bit more involved than producer backends, and
|
63
|
+
must define methods such as:
|
64
|
+
* `encode` a payload or key (when encoding a key, for Avro a key schema
|
65
|
+
may be auto-generated)
|
66
|
+
* `decode` a payload or key
|
67
|
+
* `validate` that a payload is correct for encoding
|
68
|
+
* `coerce` a payload into the given schema (e.g. turn ints into strings)
|
69
|
+
* Get a list of `schema_fields` in the configured schema, used when interacting
|
70
|
+
with ActiveRecord
|
71
|
+
* Define a `mock` backend when the given backend is used. This is used
|
72
|
+
during testing. Typically mock backends will validate values but not
|
73
|
+
actually encode/decode them.
|
74
|
+
|
75
|
+
### Configuration
|
76
|
+
|
77
|
+
Deimos has its own `Configurable` module that makes heavy use of `method_missing`
|
78
|
+
to provide a very succinct but powerful configuration format (including
|
79
|
+
default values, procs, print out as hash, reset, etc.). It also
|
80
|
+
allows for multiple blocks to define different objects of the same time
|
81
|
+
(like producers, consumers, pollers etc.).
|
82
|
+
|
83
|
+
The configuration definition for Deimos is in `config/configuration.rb`. In
|
84
|
+
addition, there are methods in `config/phobos_config.rb` which translate to/from
|
85
|
+
the Phobos configuration format and support the old `phobos.yml` method
|
86
|
+
of configuration.
|
87
|
+
|
88
|
+
### Metrics and Tracing
|
89
|
+
|
90
|
+
These are simpler than other plugins and must implement the expected methods
|
91
|
+
(`increment`, `gauge`, `histogram` and `time` for metrics, and `start`, `finish`
|
92
|
+
and `set_error` for tracing). These are used primarily in producers and consumers.
|
93
|
+
|
94
|
+
### ActiveRecord Integration
|
95
|
+
|
96
|
+
Deimos provides an `ActiveRecordConsumer` and `ActiveRecordProducer`. These are
|
97
|
+
relatively lightweight ways to save data into a database or read it off
|
98
|
+
the database as part of app logic. It uses things like the `coerce` method
|
99
|
+
of the schema backends to manage the differences between the given payload
|
100
|
+
and the configured schema for the topic.
|
101
|
+
|
102
|
+
### Database Backend / Database Producer
|
103
|
+
|
104
|
+
This feature (which provides better performance and transaction guarantees)
|
105
|
+
is powered by two components:
|
106
|
+
* The `db` _publish backend_, which saves messages to the database rather
|
107
|
+
than to Kafka;
|
108
|
+
* The `DbProducer` utility, which runs as a separate process, pulls data
|
109
|
+
from the database and sends it to Kafka.
|
110
|
+
|
111
|
+
There are a set of utility classes that power the producer, which are largely
|
112
|
+
copied from Phobos:
|
113
|
+
* `Executor` takes a set of "runnable" things (which implement a `start` and `stop`
|
114
|
+
method) puts them in a thread pool and runs them all concurrently. It
|
115
|
+
manages starting and stopping all threads when necessary.
|
116
|
+
* `SignalHandler` wraps the Executor and handles SIGINT and SIGTERM signals
|
117
|
+
to stop the executor gracefully.
|
118
|
+
|
119
|
+
In the case of this feature, the `DbProducer` is the runnable object - it
|
120
|
+
can run several threads at once.
|
121
|
+
|
122
|
+
On the database side, the `ActiveRecord` models that power this feature are:
|
123
|
+
* `KafkaMessage`: The actual message, saved to the database. This message
|
124
|
+
is already encoded by the producer, so only has to be sent.
|
125
|
+
* `KafkaTopicInfo`: Used for locking topics so only one producer can work
|
126
|
+
on it at once.
|
127
|
+
|
128
|
+
A Rake task (defined in `deimos.rake`) can be used to start the producer.
|
129
|
+
|
130
|
+
### Database Poller
|
131
|
+
|
132
|
+
This feature (which periodically polls the database to send Kafka messages)
|
133
|
+
primarily uses other aspects of Deimos and hence is relatively small in size.
|
134
|
+
The `DbPoller` class acts as a "runnable" and is used by an Executor (above).
|
135
|
+
The `PollInfo` class is saved to the database to keep track of where each
|
136
|
+
poller is up to.
|
137
|
+
|
138
|
+
A Rake task (defined in `deimos.rake`) can be used to start the pollers.
|
139
|
+
|
140
|
+
### Other Utilities
|
141
|
+
|
142
|
+
The `utils` folder also contains the `LagReporter` (which sends metrics on
|
143
|
+
lag) and the `InlineConsumer`, which can read data from a topic and directly
|
144
|
+
pass it into a handler or save it to memory.
|
data/docs/CONFIGURATION.md
CHANGED
@@ -58,6 +58,10 @@ Deimos.configure do
|
|
58
58
|
namespace 'my.namespace'
|
59
59
|
key_config field: :id
|
60
60
|
|
61
|
+
# Setting to :inline_batch will invoke consume_batch instead of consume
|
62
|
+
# for each batch of messages.
|
63
|
+
delivery :batch
|
64
|
+
|
61
65
|
# If config.schema.path is app/schemas, assumes there is a file in
|
62
66
|
# app/schemas/my/namespace/MyTopicSchema.avsc
|
63
67
|
end
|
@@ -89,6 +93,29 @@ offset_commit_threshold|0|Number of messages that can be processed before their
|
|
89
93
|
heartbeat_interval|10|Interval between heartbeats; must be less than the session window.
|
90
94
|
backoff|`(1000..60_000)`|Range representing the minimum and maximum number of milliseconds to back off after a consumer error.
|
91
95
|
|
96
|
+
## Defining Database Pollers
|
97
|
+
|
98
|
+
These are used when polling the database via `rake deimos:db_poller`. You
|
99
|
+
can create a number of pollers, one per topic.
|
100
|
+
|
101
|
+
```ruby
|
102
|
+
Deimos.configure do
|
103
|
+
db_poller do
|
104
|
+
producer_class 'MyProducer'
|
105
|
+
run_every 2.minutes
|
106
|
+
end
|
107
|
+
end
|
108
|
+
```
|
109
|
+
|
110
|
+
Config name|Default|Description
|
111
|
+
-----------|-------|-----------
|
112
|
+
producer_class|nil|ActiveRecordProducer class to use for sending messages.
|
113
|
+
run_every|60|Amount of time in seconds to wait between runs.
|
114
|
+
timestamp_column|`:updated_at`|Name of the column to query. Remember to add an index to this column!
|
115
|
+
delay_time|2|Amount of time in seconds to wait before picking up records, to allow for transactions to finish.
|
116
|
+
full_table|false|If set to true, do a full table dump to Kafka each run. Good for very small tables.
|
117
|
+
start_from_beginning|true|If false, start from the current time instead of the beginning of time if this is the first time running the poller.
|
118
|
+
|
92
119
|
## Kafka Configuration
|
93
120
|
|
94
121
|
Config name|Default|Description
|
data/lib/deimos.rb
CHANGED
@@ -28,9 +28,10 @@ if defined?(ActiveRecord)
|
|
28
28
|
require 'deimos/kafka_source'
|
29
29
|
require 'deimos/kafka_topic_info'
|
30
30
|
require 'deimos/backends/db'
|
31
|
-
require '
|
32
|
-
require '
|
31
|
+
require 'sigurd/signal_handler.rb'
|
32
|
+
require 'sigurd/executor.rb'
|
33
33
|
require 'deimos/utils/db_producer.rb'
|
34
|
+
require 'deimos/utils/db_poller'
|
34
35
|
end
|
35
36
|
|
36
37
|
require 'deimos/utils/inline_consumer'
|
@@ -71,10 +72,10 @@ module Deimos
|
|
71
72
|
Deimos::Utils::DbProducer.
|
72
73
|
new(self.config.db_producer.logger || self.config.logger)
|
73
74
|
end
|
74
|
-
executor =
|
75
|
-
|
76
|
-
|
77
|
-
signal_handler =
|
75
|
+
executor = Sigurd::Executor.new(producers,
|
76
|
+
sleep_seconds: 5,
|
77
|
+
logger: self.config.logger)
|
78
|
+
signal_handler = Sigurd::SignalHandler.new(executor)
|
78
79
|
signal_handler.run!
|
79
80
|
end
|
80
81
|
end
|
@@ -0,0 +1,159 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'deimos/active_record_consume/batch_slicer'
|
4
|
+
require 'deimos/utils/deadlock_retry'
|
5
|
+
require 'deimos/message'
|
6
|
+
|
7
|
+
module Deimos
|
8
|
+
module ActiveRecordConsume
|
9
|
+
# Methods for consuming batches of messages and saving them to the database
|
10
|
+
# in bulk ActiveRecord operations.
|
11
|
+
module BatchConsumption
|
12
|
+
# Handle a batch of Kafka messages. Batches are split into "slices",
|
13
|
+
# which are groups of independent messages that can be processed together
|
14
|
+
# in a single database operation.
|
15
|
+
# If two messages in a batch have the same key, we cannot process them
|
16
|
+
# in the same operation as they would interfere with each other. Thus
|
17
|
+
# they are split
|
18
|
+
# @param payloads [Array<Hash>] Decoded payloads.
|
19
|
+
# @param metadata [Hash] Information about batch, including keys.
|
20
|
+
def consume_batch(payloads, metadata)
|
21
|
+
messages = payloads.
|
22
|
+
zip(metadata[:keys]).
|
23
|
+
map { |p, k| Deimos::Message.new(p, nil, key: k) }
|
24
|
+
|
25
|
+
tags = %W(topic:#{metadata[:topic]})
|
26
|
+
|
27
|
+
Deimos.instrument('ar_consumer.consume_batch', tags) do
|
28
|
+
# The entire batch should be treated as one transaction so that if
|
29
|
+
# any message fails, the whole thing is rolled back or retried
|
30
|
+
# if there is deadlock
|
31
|
+
Deimos::Utils::DeadlockRetry.wrap(tags) do
|
32
|
+
if @compacted || self.class.config[:no_keys]
|
33
|
+
update_database(compact_messages(messages))
|
34
|
+
else
|
35
|
+
uncompacted_update(messages)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# Get unique key for the ActiveRecord instance from the incoming key.
|
42
|
+
# Override this method (with super) to customize the set of attributes that
|
43
|
+
# uniquely identifies each record in the database.
|
44
|
+
# @param key [String] The encoded key.
|
45
|
+
# @return [Hash] The key attributes.
|
46
|
+
def record_key(key)
|
47
|
+
decoded_key = decode_key(key)
|
48
|
+
|
49
|
+
if decoded_key.nil?
|
50
|
+
{}
|
51
|
+
elsif decoded_key.is_a?(Hash)
|
52
|
+
@key_converter.convert(decoded_key)
|
53
|
+
else
|
54
|
+
{ @klass.primary_key => decoded_key }
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
protected
|
59
|
+
|
60
|
+
# Perform database operations for a batch of messages without compaction.
|
61
|
+
# All messages are split into slices containing only unique keys, and
|
62
|
+
# each slice is handles as its own batch.
|
63
|
+
# @param messages [Array<Message>] List of messages.
|
64
|
+
def uncompacted_update(messages)
|
65
|
+
BatchSlicer.
|
66
|
+
slice(messages).
|
67
|
+
each(&method(:update_database))
|
68
|
+
end
|
69
|
+
|
70
|
+
# Perform database operations for a group of messages.
|
71
|
+
# All messages with payloads are passed to upsert_records.
|
72
|
+
# All tombstones messages are passed to remove_records.
|
73
|
+
# @param messages [Array<Message>] List of messages.
|
74
|
+
def update_database(messages)
|
75
|
+
# Find all upserted records (i.e. that have a payload) and all
|
76
|
+
# deleted record (no payload)
|
77
|
+
removed, upserted = messages.partition(&:tombstone?)
|
78
|
+
|
79
|
+
upsert_records(upserted) if upserted.any?
|
80
|
+
remove_records(removed) if removed.any?
|
81
|
+
end
|
82
|
+
|
83
|
+
# Upsert any non-deleted records
|
84
|
+
# @param messages [Array<Message>] List of messages for a group of
|
85
|
+
# records to either be updated or inserted.
|
86
|
+
def upsert_records(messages)
|
87
|
+
key_cols = key_columns(messages)
|
88
|
+
|
89
|
+
# Create payloads with payload + key attributes
|
90
|
+
upserts = messages.map do |m|
|
91
|
+
record_attributes(m.payload, m.key)&.
|
92
|
+
merge(record_key(m.key))
|
93
|
+
end
|
94
|
+
|
95
|
+
# If overridden record_attributes indicated no record, skip
|
96
|
+
upserts.compact!
|
97
|
+
|
98
|
+
options = if key_cols.empty?
|
99
|
+
{} # Can't upsert with no key, just do regular insert
|
100
|
+
else
|
101
|
+
{
|
102
|
+
on_duplicate_key_update: {
|
103
|
+
# conflict_target must explicitly list the columns for
|
104
|
+
# Postgres and SQLite. Not required for MySQL, but this
|
105
|
+
# ensures consistent behaviour.
|
106
|
+
conflict_target: key_cols,
|
107
|
+
columns: :all
|
108
|
+
}
|
109
|
+
}
|
110
|
+
end
|
111
|
+
|
112
|
+
@klass.import!(upserts, options)
|
113
|
+
end
|
114
|
+
|
115
|
+
# Delete any records with a tombstone.
|
116
|
+
# @param messages [Array<Message>] List of messages for a group of
|
117
|
+
# deleted records.
|
118
|
+
def remove_records(messages)
|
119
|
+
clause = deleted_query(messages)
|
120
|
+
|
121
|
+
clause.delete_all
|
122
|
+
end
|
123
|
+
|
124
|
+
# Create an ActiveRecord relation that matches all of the passed
|
125
|
+
# records. Used for bulk deletion.
|
126
|
+
# @param records [Array<Message>] List of messages.
|
127
|
+
# @return ActiveRecord::Relation Matching relation.
|
128
|
+
def deleted_query(records)
|
129
|
+
keys = records.
|
130
|
+
map { |m| record_key(m.key)[@klass.primary_key] }.
|
131
|
+
reject(&:nil?)
|
132
|
+
|
133
|
+
@klass.unscoped.where(@klass.primary_key => keys)
|
134
|
+
end
|
135
|
+
|
136
|
+
# Get the set of attribute names that uniquely identify messages in the
|
137
|
+
# batch. Requires at least one record.
|
138
|
+
# @param records [Array<Message>] Non-empty list of messages.
|
139
|
+
# @return [Array<String>] List of attribute names.
|
140
|
+
# @raise If records is empty.
|
141
|
+
def key_columns(records)
|
142
|
+
raise 'Cannot determine key from empty batch' if records.empty?
|
143
|
+
|
144
|
+
first_key = records.first.key
|
145
|
+
record_key(first_key).keys
|
146
|
+
end
|
147
|
+
|
148
|
+
# Compact a batch of messages, taking only the last message for each
|
149
|
+
# unique key.
|
150
|
+
# @param batch [Array<Message>] Batch of messages.
|
151
|
+
# @return [Array<Message>] Compacted batch.
|
152
|
+
def compact_messages(batch)
|
153
|
+
return batch unless batch.first&.key.present?
|
154
|
+
|
155
|
+
batch.reverse.uniq(&:key).reverse!
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Deimos
|
4
|
+
module ActiveRecordConsume
|
5
|
+
# Helper class for breaking down batches into independent groups for
|
6
|
+
# processing
|
7
|
+
class BatchSlicer
|
8
|
+
# Split the batch into a series of independent slices. Each slice contains
|
9
|
+
# messages that can be processed in any order (i.e. they have distinct
|
10
|
+
# keys). Messages with the same key will be separated into different
|
11
|
+
# slices that maintain the correct order.
|
12
|
+
# E.g. Given messages A1, A2, B1, C1, C2, C3, they will be sliced as:
|
13
|
+
# [[A1, B1, C1], [A2, C2], [C3]]
|
14
|
+
def self.slice(messages)
|
15
|
+
ops = messages.group_by(&:key)
|
16
|
+
|
17
|
+
# Find maximum depth
|
18
|
+
depth = ops.values.map(&:length).max || 0
|
19
|
+
|
20
|
+
# Generate slices for each depth
|
21
|
+
depth.times.map do |i|
|
22
|
+
ops.values.map { |arr| arr.dig(i) }.compact
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Deimos
|
4
|
+
module ActiveRecordConsume
|
5
|
+
# Methods for consuming individual messages and saving them to the database
|
6
|
+
# as ActiveRecord instances.
|
7
|
+
module MessageConsumption
|
8
|
+
# Find the record specified by the given payload and key.
|
9
|
+
# Default is to use the primary key column and the value of the first
|
10
|
+
# field in the key.
|
11
|
+
# @param klass [Class < ActiveRecord::Base]
|
12
|
+
# @param _payload [Hash]
|
13
|
+
# @param key [Object]
|
14
|
+
# @return [ActiveRecord::Base]
|
15
|
+
def fetch_record(klass, _payload, key)
|
16
|
+
klass.unscoped.where(klass.primary_key => key).first
|
17
|
+
end
|
18
|
+
|
19
|
+
# Assign a key to a new record.
|
20
|
+
# @param record [ActiveRecord::Base]
|
21
|
+
# @param _payload [Hash]
|
22
|
+
# @param key [Object]
|
23
|
+
def assign_key(record, _payload, key)
|
24
|
+
record[record.class.primary_key] = key
|
25
|
+
end
|
26
|
+
|
27
|
+
# :nodoc:
|
28
|
+
def consume(payload, metadata)
|
29
|
+
key = metadata.with_indifferent_access[:key]
|
30
|
+
klass = self.class.config[:record_class]
|
31
|
+
record = fetch_record(klass, (payload || {}).with_indifferent_access, key)
|
32
|
+
if payload.nil?
|
33
|
+
destroy_record(record)
|
34
|
+
return
|
35
|
+
end
|
36
|
+
if record.blank?
|
37
|
+
record = klass.new
|
38
|
+
assign_key(record, payload, key)
|
39
|
+
end
|
40
|
+
attrs = record_attributes(payload.with_indifferent_access, key)
|
41
|
+
# don't use attributes= - bypass Rails < 5 attr_protected
|
42
|
+
attrs.each do |k, v|
|
43
|
+
record.send("#{k}=", v)
|
44
|
+
end
|
45
|
+
record.created_at ||= Time.zone.now if record.respond_to?(:created_at)
|
46
|
+
record.updated_at = Time.zone.now if record.respond_to?(:updated_at)
|
47
|
+
record.save!
|
48
|
+
end
|
49
|
+
|
50
|
+
# Destroy a record that received a null payload. Override if you need
|
51
|
+
# to do something other than a straight destroy (e.g. mark as archived).
|
52
|
+
# @param record [ActiveRecord::Base]
|
53
|
+
def destroy_record(record)
|
54
|
+
record&.destroy
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|