deimos-ruby 1.6.2 → 1.8.0.pre.beta2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.circleci/config.yml +9 -0
- data/.rubocop.yml +15 -13
- data/.ruby-version +1 -1
- data/CHANGELOG.md +31 -0
- data/Gemfile.lock +43 -36
- data/README.md +141 -16
- data/Rakefile +1 -1
- data/deimos-ruby.gemspec +2 -1
- data/docs/ARCHITECTURE.md +144 -0
- data/docs/CONFIGURATION.md +27 -0
- data/lib/deimos.rb +7 -6
- data/lib/deimos/active_record_consume/batch_consumption.rb +159 -0
- data/lib/deimos/active_record_consume/batch_slicer.rb +27 -0
- data/lib/deimos/active_record_consume/message_consumption.rb +58 -0
- data/lib/deimos/active_record_consume/schema_model_converter.rb +52 -0
- data/lib/deimos/active_record_consumer.rb +33 -75
- data/lib/deimos/active_record_producer.rb +23 -0
- data/lib/deimos/batch_consumer.rb +2 -140
- data/lib/deimos/config/configuration.rb +28 -10
- data/lib/deimos/consume/batch_consumption.rb +150 -0
- data/lib/deimos/consume/message_consumption.rb +94 -0
- data/lib/deimos/consumer.rb +79 -69
- data/lib/deimos/kafka_message.rb +1 -1
- data/lib/deimos/kafka_topic_info.rb +1 -1
- data/lib/deimos/message.rb +6 -1
- data/lib/deimos/metrics/provider.rb +0 -2
- data/lib/deimos/poll_info.rb +9 -0
- data/lib/deimos/tracing/provider.rb +0 -2
- data/lib/deimos/utils/db_poller.rb +149 -0
- data/lib/deimos/utils/db_producer.rb +8 -3
- data/lib/deimos/utils/deadlock_retry.rb +68 -0
- data/lib/deimos/utils/lag_reporter.rb +19 -26
- data/lib/deimos/version.rb +1 -1
- data/lib/generators/deimos/db_poller/templates/migration +11 -0
- data/lib/generators/deimos/db_poller/templates/rails3_migration +16 -0
- data/lib/generators/deimos/db_poller_generator.rb +48 -0
- data/lib/tasks/deimos.rake +7 -0
- data/spec/active_record_batch_consumer_spec.rb +481 -0
- data/spec/active_record_consume/batch_slicer_spec.rb +42 -0
- data/spec/active_record_consume/schema_model_converter_spec.rb +105 -0
- data/spec/active_record_consumer_spec.rb +3 -11
- data/spec/active_record_producer_spec.rb +66 -88
- data/spec/batch_consumer_spec.rb +24 -7
- data/spec/config/configuration_spec.rb +4 -0
- data/spec/consumer_spec.rb +8 -8
- data/spec/deimos_spec.rb +57 -49
- data/spec/handlers/my_batch_consumer.rb +6 -1
- data/spec/handlers/my_consumer.rb +6 -1
- data/spec/message_spec.rb +19 -0
- data/spec/producer_spec.rb +3 -3
- data/spec/rake_spec.rb +1 -1
- data/spec/schemas/com/my-namespace/MySchemaCompound-key.avsc +18 -0
- data/spec/schemas/com/my-namespace/Wibble.avsc +43 -0
- data/spec/spec_helper.rb +61 -6
- data/spec/utils/db_poller_spec.rb +320 -0
- data/spec/utils/deadlock_retry_spec.rb +74 -0
- data/spec/utils/lag_reporter_spec.rb +29 -22
- metadata +55 -20
- data/lib/deimos/base_consumer.rb +0 -104
- data/lib/deimos/utils/executor.rb +0 -124
- data/lib/deimos/utils/platform_schema_validation.rb +0 -0
- data/lib/deimos/utils/signal_handler.rb +0 -68
- data/spec/utils/executor_spec.rb +0 -53
- data/spec/utils/signal_handler_spec.rb +0 -16
data/Rakefile
CHANGED
data/deimos-ruby.gemspec
CHANGED
@@ -21,11 +21,12 @@ Gem::Specification.new do |spec|
|
|
21
21
|
spec.add_runtime_dependency('avro_turf', '~> 0.11')
|
22
22
|
spec.add_runtime_dependency('phobos', '~> 1.9')
|
23
23
|
spec.add_runtime_dependency('ruby-kafka', '~> 0.7')
|
24
|
+
spec.add_runtime_dependency('sigurd', '0.0.1')
|
24
25
|
|
25
26
|
spec.add_development_dependency('activerecord', '~> 5.2')
|
26
27
|
spec.add_development_dependency('activerecord-import')
|
27
28
|
spec.add_development_dependency('avro', '~> 1.9')
|
28
|
-
spec.add_development_dependency('
|
29
|
+
spec.add_development_dependency('database_cleaner', '~> 1.7')
|
29
30
|
spec.add_development_dependency('ddtrace', '~> 0.11')
|
30
31
|
spec.add_development_dependency('dogstatsd-ruby', '~> 4.2')
|
31
32
|
spec.add_development_dependency('guard', '~> 2')
|
@@ -0,0 +1,144 @@
|
|
1
|
+
# Deimos Architecture
|
2
|
+
|
3
|
+
Deimos is the third of three libraries that add functionality on top of each
|
4
|
+
other:
|
5
|
+
|
6
|
+
* [RubyKafka](https://github.com/zendesk/ruby-kafka) is the low-level Kafka
|
7
|
+
client, providing API's for producers, consumers and the client as a whole.
|
8
|
+
* [Phobos](https://github.com/phobos/phobos) is a lightweight wrapper on top
|
9
|
+
of RubyKafka that provides threaded consumers, a simpler way to write
|
10
|
+
producers, and lifecycle management.
|
11
|
+
* [Deimos](https://github.com/flipp-oss/deimos/) is a full-featured framework
|
12
|
+
using Phobos as its base which provides schema integration (e.g. Avro),
|
13
|
+
database integration, metrics, tracing, test helpers and other utilities.
|
14
|
+
|
15
|
+
## Folder structure
|
16
|
+
|
17
|
+
As of May 12, 2020, the following are the important files to understand in how
|
18
|
+
Deimos fits together:
|
19
|
+
* `lib/generators`: Generators to generate database migrations, e.g.
|
20
|
+
for the DB Poller and DB Producer features.
|
21
|
+
* `lib/tasks`: Rake tasks for starting consumers, DB Pollers, etc.
|
22
|
+
* `lib/deimos`: Main Deimos code.
|
23
|
+
* `lib/deimos/deimos.rb`: The bootstrap / startup code for Deimos. Also provides
|
24
|
+
some global convenience methods and (for legacy purposes) the way to
|
25
|
+
start the DB Producer.
|
26
|
+
* `lib/deimos/backends`: The different plug-in producer backends - e.g. produce
|
27
|
+
directly to Kafka, use the DB backend, etc.
|
28
|
+
* `lib/deimos/schema_backends`: The different plug-in schema handlers, such
|
29
|
+
as the various flavors of Avro (with/without schema registry etc.)
|
30
|
+
* `lib/deimos/metrics`: The different plug-in metrics providers, e.g. Datadog.
|
31
|
+
* `lib/deimos/tracing`: The different plug-in tracing providers, e.g. Datadog.
|
32
|
+
* `lib/deimos/utils`: Utility classes for things not directly related to
|
33
|
+
producing and consuming, such as the DB Poller, DB Producer, lag reporter, etc.
|
34
|
+
* `lib/deimos/config`: Classes related to configuring Deimos.
|
35
|
+
* `lib/deimos/monkey_patches`: Monkey patches to existing libraries. These
|
36
|
+
should be removed in a future update.
|
37
|
+
|
38
|
+
## Features
|
39
|
+
|
40
|
+
### Producers and Consumers
|
41
|
+
|
42
|
+
Both producers and consumers include the `SharedConfig` module, which
|
43
|
+
standardizes configuration like schema settings, topic, keys, etc.
|
44
|
+
|
45
|
+
Consumers come in two flavors: `Consumer` and `BatchConsumer`. Both include
|
46
|
+
`BaseConsumer` for shared functionality.
|
47
|
+
|
48
|
+
While producing messages go to Kafka by default, literally anything else
|
49
|
+
can happen when your producer calls `produce`, by swapping out the producer
|
50
|
+
_backend_. This is just a file that needs to inherit from `Deimos::Backends::Base`
|
51
|
+
and must implement a single method, `execute`.
|
52
|
+
|
53
|
+
Producers have a complex workflow while processing the payload to publish. This
|
54
|
+
is aided by the `Deimos::Message` class (not to be confused with the
|
55
|
+
`KafkaMessage` class, which is an ActiveRecord used by the DB Producer feature,
|
56
|
+
below).
|
57
|
+
|
58
|
+
### Schemas
|
59
|
+
|
60
|
+
Schema backends are used to encode and decode payloads into different formats
|
61
|
+
such as Avro. These are integrated with producers and consumers, as well
|
62
|
+
as test helpers. These are a bit more involved than producer backends, and
|
63
|
+
must define methods such as:
|
64
|
+
* `encode` a payload or key (when encoding a key, for Avro a key schema
|
65
|
+
may be auto-generated)
|
66
|
+
* `decode` a payload or key
|
67
|
+
* `validate` that a payload is correct for encoding
|
68
|
+
* `coerce` a payload into the given schema (e.g. turn ints into strings)
|
69
|
+
* Get a list of `schema_fields` in the configured schema, used when interacting
|
70
|
+
with ActiveRecord
|
71
|
+
* Define a `mock` backend when the given backend is used. This is used
|
72
|
+
during testing. Typically mock backends will validate values but not
|
73
|
+
actually encode/decode them.
|
74
|
+
|
75
|
+
### Configuration
|
76
|
+
|
77
|
+
Deimos has its own `Configurable` module that makes heavy use of `method_missing`
|
78
|
+
to provide a very succinct but powerful configuration format (including
|
79
|
+
default values, procs, print out as hash, reset, etc.). It also
|
80
|
+
allows for multiple blocks to define different objects of the same time
|
81
|
+
(like producers, consumers, pollers etc.).
|
82
|
+
|
83
|
+
The configuration definition for Deimos is in `config/configuration.rb`. In
|
84
|
+
addition, there are methods in `config/phobos_config.rb` which translate to/from
|
85
|
+
the Phobos configuration format and support the old `phobos.yml` method
|
86
|
+
of configuration.
|
87
|
+
|
88
|
+
### Metrics and Tracing
|
89
|
+
|
90
|
+
These are simpler than other plugins and must implement the expected methods
|
91
|
+
(`increment`, `gauge`, `histogram` and `time` for metrics, and `start`, `finish`
|
92
|
+
and `set_error` for tracing). These are used primarily in producers and consumers.
|
93
|
+
|
94
|
+
### ActiveRecord Integration
|
95
|
+
|
96
|
+
Deimos provides an `ActiveRecordConsumer` and `ActiveRecordProducer`. These are
|
97
|
+
relatively lightweight ways to save data into a database or read it off
|
98
|
+
the database as part of app logic. It uses things like the `coerce` method
|
99
|
+
of the schema backends to manage the differences between the given payload
|
100
|
+
and the configured schema for the topic.
|
101
|
+
|
102
|
+
### Database Backend / Database Producer
|
103
|
+
|
104
|
+
This feature (which provides better performance and transaction guarantees)
|
105
|
+
is powered by two components:
|
106
|
+
* The `db` _publish backend_, which saves messages to the database rather
|
107
|
+
than to Kafka;
|
108
|
+
* The `DbProducer` utility, which runs as a separate process, pulls data
|
109
|
+
from the database and sends it to Kafka.
|
110
|
+
|
111
|
+
There are a set of utility classes that power the producer, which are largely
|
112
|
+
copied from Phobos:
|
113
|
+
* `Executor` takes a set of "runnable" things (which implement a `start` and `stop`
|
114
|
+
method) puts them in a thread pool and runs them all concurrently. It
|
115
|
+
manages starting and stopping all threads when necessary.
|
116
|
+
* `SignalHandler` wraps the Executor and handles SIGINT and SIGTERM signals
|
117
|
+
to stop the executor gracefully.
|
118
|
+
|
119
|
+
In the case of this feature, the `DbProducer` is the runnable object - it
|
120
|
+
can run several threads at once.
|
121
|
+
|
122
|
+
On the database side, the `ActiveRecord` models that power this feature are:
|
123
|
+
* `KafkaMessage`: The actual message, saved to the database. This message
|
124
|
+
is already encoded by the producer, so only has to be sent.
|
125
|
+
* `KafkaTopicInfo`: Used for locking topics so only one producer can work
|
126
|
+
on it at once.
|
127
|
+
|
128
|
+
A Rake task (defined in `deimos.rake`) can be used to start the producer.
|
129
|
+
|
130
|
+
### Database Poller
|
131
|
+
|
132
|
+
This feature (which periodically polls the database to send Kafka messages)
|
133
|
+
primarily uses other aspects of Deimos and hence is relatively small in size.
|
134
|
+
The `DbPoller` class acts as a "runnable" and is used by an Executor (above).
|
135
|
+
The `PollInfo` class is saved to the database to keep track of where each
|
136
|
+
poller is up to.
|
137
|
+
|
138
|
+
A Rake task (defined in `deimos.rake`) can be used to start the pollers.
|
139
|
+
|
140
|
+
### Other Utilities
|
141
|
+
|
142
|
+
The `utils` folder also contains the `LagReporter` (which sends metrics on
|
143
|
+
lag) and the `InlineConsumer`, which can read data from a topic and directly
|
144
|
+
pass it into a handler or save it to memory.
|
data/docs/CONFIGURATION.md
CHANGED
@@ -58,6 +58,10 @@ Deimos.configure do
|
|
58
58
|
namespace 'my.namespace'
|
59
59
|
key_config field: :id
|
60
60
|
|
61
|
+
# Setting to :inline_batch will invoke consume_batch instead of consume
|
62
|
+
# for each batch of messages.
|
63
|
+
delivery :batch
|
64
|
+
|
61
65
|
# If config.schema.path is app/schemas, assumes there is a file in
|
62
66
|
# app/schemas/my/namespace/MyTopicSchema.avsc
|
63
67
|
end
|
@@ -89,6 +93,29 @@ offset_commit_threshold|0|Number of messages that can be processed before their
|
|
89
93
|
heartbeat_interval|10|Interval between heartbeats; must be less than the session window.
|
90
94
|
backoff|`(1000..60_000)`|Range representing the minimum and maximum number of milliseconds to back off after a consumer error.
|
91
95
|
|
96
|
+
## Defining Database Pollers
|
97
|
+
|
98
|
+
These are used when polling the database via `rake deimos:db_poller`. You
|
99
|
+
can create a number of pollers, one per topic.
|
100
|
+
|
101
|
+
```ruby
|
102
|
+
Deimos.configure do
|
103
|
+
db_poller do
|
104
|
+
producer_class 'MyProducer'
|
105
|
+
run_every 2.minutes
|
106
|
+
end
|
107
|
+
end
|
108
|
+
```
|
109
|
+
|
110
|
+
Config name|Default|Description
|
111
|
+
-----------|-------|-----------
|
112
|
+
producer_class|nil|ActiveRecordProducer class to use for sending messages.
|
113
|
+
run_every|60|Amount of time in seconds to wait between runs.
|
114
|
+
timestamp_column|`:updated_at`|Name of the column to query. Remember to add an index to this column!
|
115
|
+
delay_time|2|Amount of time in seconds to wait before picking up records, to allow for transactions to finish.
|
116
|
+
full_table|false|If set to true, do a full table dump to Kafka each run. Good for very small tables.
|
117
|
+
start_from_beginning|true|If false, start from the current time instead of the beginning of time if this is the first time running the poller.
|
118
|
+
|
92
119
|
## Kafka Configuration
|
93
120
|
|
94
121
|
Config name|Default|Description
|
data/lib/deimos.rb
CHANGED
@@ -28,9 +28,10 @@ if defined?(ActiveRecord)
|
|
28
28
|
require 'deimos/kafka_source'
|
29
29
|
require 'deimos/kafka_topic_info'
|
30
30
|
require 'deimos/backends/db'
|
31
|
-
require '
|
32
|
-
require '
|
31
|
+
require 'sigurd/signal_handler.rb'
|
32
|
+
require 'sigurd/executor.rb'
|
33
33
|
require 'deimos/utils/db_producer.rb'
|
34
|
+
require 'deimos/utils/db_poller'
|
34
35
|
end
|
35
36
|
|
36
37
|
require 'deimos/utils/inline_consumer'
|
@@ -71,10 +72,10 @@ module Deimos
|
|
71
72
|
Deimos::Utils::DbProducer.
|
72
73
|
new(self.config.db_producer.logger || self.config.logger)
|
73
74
|
end
|
74
|
-
executor =
|
75
|
-
|
76
|
-
|
77
|
-
signal_handler =
|
75
|
+
executor = Sigurd::Executor.new(producers,
|
76
|
+
sleep_seconds: 5,
|
77
|
+
logger: self.config.logger)
|
78
|
+
signal_handler = Sigurd::SignalHandler.new(executor)
|
78
79
|
signal_handler.run!
|
79
80
|
end
|
80
81
|
end
|
@@ -0,0 +1,159 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'deimos/active_record_consume/batch_slicer'
|
4
|
+
require 'deimos/utils/deadlock_retry'
|
5
|
+
require 'deimos/message'
|
6
|
+
|
7
|
+
module Deimos
|
8
|
+
module ActiveRecordConsume
|
9
|
+
# Methods for consuming batches of messages and saving them to the database
|
10
|
+
# in bulk ActiveRecord operations.
|
11
|
+
module BatchConsumption
|
12
|
+
# Handle a batch of Kafka messages. Batches are split into "slices",
|
13
|
+
# which are groups of independent messages that can be processed together
|
14
|
+
# in a single database operation.
|
15
|
+
# If two messages in a batch have the same key, we cannot process them
|
16
|
+
# in the same operation as they would interfere with each other. Thus
|
17
|
+
# they are split
|
18
|
+
# @param payloads [Array<Hash>] Decoded payloads.
|
19
|
+
# @param metadata [Hash] Information about batch, including keys.
|
20
|
+
def consume_batch(payloads, metadata)
|
21
|
+
messages = payloads.
|
22
|
+
zip(metadata[:keys]).
|
23
|
+
map { |p, k| Deimos::Message.new(p, nil, key: k) }
|
24
|
+
|
25
|
+
tags = %W(topic:#{metadata[:topic]})
|
26
|
+
|
27
|
+
Deimos.instrument('ar_consumer.consume_batch', tags) do
|
28
|
+
# The entire batch should be treated as one transaction so that if
|
29
|
+
# any message fails, the whole thing is rolled back or retried
|
30
|
+
# if there is deadlock
|
31
|
+
Deimos::Utils::DeadlockRetry.wrap(tags) do
|
32
|
+
if @compacted || self.class.config[:no_keys]
|
33
|
+
update_database(compact_messages(messages))
|
34
|
+
else
|
35
|
+
uncompacted_update(messages)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# Get unique key for the ActiveRecord instance from the incoming key.
|
42
|
+
# Override this method (with super) to customize the set of attributes that
|
43
|
+
# uniquely identifies each record in the database.
|
44
|
+
# @param key [String] The encoded key.
|
45
|
+
# @return [Hash] The key attributes.
|
46
|
+
def record_key(key)
|
47
|
+
decoded_key = decode_key(key)
|
48
|
+
|
49
|
+
if decoded_key.nil?
|
50
|
+
{}
|
51
|
+
elsif decoded_key.is_a?(Hash)
|
52
|
+
@key_converter.convert(decoded_key)
|
53
|
+
else
|
54
|
+
{ @klass.primary_key => decoded_key }
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
protected
|
59
|
+
|
60
|
+
# Perform database operations for a batch of messages without compaction.
|
61
|
+
# All messages are split into slices containing only unique keys, and
|
62
|
+
# each slice is handles as its own batch.
|
63
|
+
# @param messages [Array<Message>] List of messages.
|
64
|
+
def uncompacted_update(messages)
|
65
|
+
BatchSlicer.
|
66
|
+
slice(messages).
|
67
|
+
each(&method(:update_database))
|
68
|
+
end
|
69
|
+
|
70
|
+
# Perform database operations for a group of messages.
|
71
|
+
# All messages with payloads are passed to upsert_records.
|
72
|
+
# All tombstones messages are passed to remove_records.
|
73
|
+
# @param messages [Array<Message>] List of messages.
|
74
|
+
def update_database(messages)
|
75
|
+
# Find all upserted records (i.e. that have a payload) and all
|
76
|
+
# deleted record (no payload)
|
77
|
+
removed, upserted = messages.partition(&:tombstone?)
|
78
|
+
|
79
|
+
upsert_records(upserted) if upserted.any?
|
80
|
+
remove_records(removed) if removed.any?
|
81
|
+
end
|
82
|
+
|
83
|
+
# Upsert any non-deleted records
|
84
|
+
# @param messages [Array<Message>] List of messages for a group of
|
85
|
+
# records to either be updated or inserted.
|
86
|
+
def upsert_records(messages)
|
87
|
+
key_cols = key_columns(messages)
|
88
|
+
|
89
|
+
# Create payloads with payload + key attributes
|
90
|
+
upserts = messages.map do |m|
|
91
|
+
record_attributes(m.payload, m.key)&.
|
92
|
+
merge(record_key(m.key))
|
93
|
+
end
|
94
|
+
|
95
|
+
# If overridden record_attributes indicated no record, skip
|
96
|
+
upserts.compact!
|
97
|
+
|
98
|
+
options = if key_cols.empty?
|
99
|
+
{} # Can't upsert with no key, just do regular insert
|
100
|
+
else
|
101
|
+
{
|
102
|
+
on_duplicate_key_update: {
|
103
|
+
# conflict_target must explicitly list the columns for
|
104
|
+
# Postgres and SQLite. Not required for MySQL, but this
|
105
|
+
# ensures consistent behaviour.
|
106
|
+
conflict_target: key_cols,
|
107
|
+
columns: :all
|
108
|
+
}
|
109
|
+
}
|
110
|
+
end
|
111
|
+
|
112
|
+
@klass.import!(upserts, options)
|
113
|
+
end
|
114
|
+
|
115
|
+
# Delete any records with a tombstone.
|
116
|
+
# @param messages [Array<Message>] List of messages for a group of
|
117
|
+
# deleted records.
|
118
|
+
def remove_records(messages)
|
119
|
+
clause = deleted_query(messages)
|
120
|
+
|
121
|
+
clause.delete_all
|
122
|
+
end
|
123
|
+
|
124
|
+
# Create an ActiveRecord relation that matches all of the passed
|
125
|
+
# records. Used for bulk deletion.
|
126
|
+
# @param records [Array<Message>] List of messages.
|
127
|
+
# @return ActiveRecord::Relation Matching relation.
|
128
|
+
def deleted_query(records)
|
129
|
+
keys = records.
|
130
|
+
map { |m| record_key(m.key)[@klass.primary_key] }.
|
131
|
+
reject(&:nil?)
|
132
|
+
|
133
|
+
@klass.unscoped.where(@klass.primary_key => keys)
|
134
|
+
end
|
135
|
+
|
136
|
+
# Get the set of attribute names that uniquely identify messages in the
|
137
|
+
# batch. Requires at least one record.
|
138
|
+
# @param records [Array<Message>] Non-empty list of messages.
|
139
|
+
# @return [Array<String>] List of attribute names.
|
140
|
+
# @raise If records is empty.
|
141
|
+
def key_columns(records)
|
142
|
+
raise 'Cannot determine key from empty batch' if records.empty?
|
143
|
+
|
144
|
+
first_key = records.first.key
|
145
|
+
record_key(first_key).keys
|
146
|
+
end
|
147
|
+
|
148
|
+
# Compact a batch of messages, taking only the last message for each
|
149
|
+
# unique key.
|
150
|
+
# @param batch [Array<Message>] Batch of messages.
|
151
|
+
# @return [Array<Message>] Compacted batch.
|
152
|
+
def compact_messages(batch)
|
153
|
+
return batch unless batch.first&.key.present?
|
154
|
+
|
155
|
+
batch.reverse.uniq(&:key).reverse!
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Deimos
|
4
|
+
module ActiveRecordConsume
|
5
|
+
# Helper class for breaking down batches into independent groups for
|
6
|
+
# processing
|
7
|
+
class BatchSlicer
|
8
|
+
# Split the batch into a series of independent slices. Each slice contains
|
9
|
+
# messages that can be processed in any order (i.e. they have distinct
|
10
|
+
# keys). Messages with the same key will be separated into different
|
11
|
+
# slices that maintain the correct order.
|
12
|
+
# E.g. Given messages A1, A2, B1, C1, C2, C3, they will be sliced as:
|
13
|
+
# [[A1, B1, C1], [A2, C2], [C3]]
|
14
|
+
def self.slice(messages)
|
15
|
+
ops = messages.group_by(&:key)
|
16
|
+
|
17
|
+
# Find maximum depth
|
18
|
+
depth = ops.values.map(&:length).max || 0
|
19
|
+
|
20
|
+
# Generate slices for each depth
|
21
|
+
depth.times.map do |i|
|
22
|
+
ops.values.map { |arr| arr.dig(i) }.compact
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Deimos
|
4
|
+
module ActiveRecordConsume
|
5
|
+
# Methods for consuming individual messages and saving them to the database
|
6
|
+
# as ActiveRecord instances.
|
7
|
+
module MessageConsumption
|
8
|
+
# Find the record specified by the given payload and key.
|
9
|
+
# Default is to use the primary key column and the value of the first
|
10
|
+
# field in the key.
|
11
|
+
# @param klass [Class < ActiveRecord::Base]
|
12
|
+
# @param _payload [Hash]
|
13
|
+
# @param key [Object]
|
14
|
+
# @return [ActiveRecord::Base]
|
15
|
+
def fetch_record(klass, _payload, key)
|
16
|
+
klass.unscoped.where(klass.primary_key => key).first
|
17
|
+
end
|
18
|
+
|
19
|
+
# Assign a key to a new record.
|
20
|
+
# @param record [ActiveRecord::Base]
|
21
|
+
# @param _payload [Hash]
|
22
|
+
# @param key [Object]
|
23
|
+
def assign_key(record, _payload, key)
|
24
|
+
record[record.class.primary_key] = key
|
25
|
+
end
|
26
|
+
|
27
|
+
# :nodoc:
|
28
|
+
def consume(payload, metadata)
|
29
|
+
key = metadata.with_indifferent_access[:key]
|
30
|
+
klass = self.class.config[:record_class]
|
31
|
+
record = fetch_record(klass, (payload || {}).with_indifferent_access, key)
|
32
|
+
if payload.nil?
|
33
|
+
destroy_record(record)
|
34
|
+
return
|
35
|
+
end
|
36
|
+
if record.blank?
|
37
|
+
record = klass.new
|
38
|
+
assign_key(record, payload, key)
|
39
|
+
end
|
40
|
+
attrs = record_attributes(payload.with_indifferent_access, key)
|
41
|
+
# don't use attributes= - bypass Rails < 5 attr_protected
|
42
|
+
attrs.each do |k, v|
|
43
|
+
record.send("#{k}=", v)
|
44
|
+
end
|
45
|
+
record.created_at ||= Time.zone.now if record.respond_to?(:created_at)
|
46
|
+
record.updated_at = Time.zone.now if record.respond_to?(:updated_at)
|
47
|
+
record.save!
|
48
|
+
end
|
49
|
+
|
50
|
+
# Destroy a record that received a null payload. Override if you need
|
51
|
+
# to do something other than a straight destroy (e.g. mark as archived).
|
52
|
+
# @param record [ActiveRecord::Base]
|
53
|
+
def destroy_record(record)
|
54
|
+
record&.destroy
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|