deimos-ruby 1.7.0.pre.beta1 → 1.8.0.pre.beta1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/Gemfile.lock +8 -2
- data/README.md +69 -15
- data/deimos-ruby.gemspec +2 -0
- data/docs/ARCHITECTURE.md +144 -0
- data/docs/CONFIGURATION.md +4 -0
- data/lib/deimos.rb +6 -6
- data/lib/deimos/active_record_consume/batch_consumption.rb +159 -0
- data/lib/deimos/active_record_consume/batch_slicer.rb +27 -0
- data/lib/deimos/active_record_consume/message_consumption.rb +58 -0
- data/lib/deimos/active_record_consume/schema_model_converter.rb +52 -0
- data/lib/deimos/active_record_consumer.rb +33 -75
- data/lib/deimos/batch_consumer.rb +2 -142
- data/lib/deimos/config/configuration.rb +8 -10
- data/lib/deimos/consume/batch_consumption.rb +148 -0
- data/lib/deimos/consume/message_consumption.rb +93 -0
- data/lib/deimos/consumer.rb +79 -72
- data/lib/deimos/kafka_message.rb +1 -1
- data/lib/deimos/message.rb +6 -1
- data/lib/deimos/utils/db_poller.rb +6 -6
- data/lib/deimos/utils/db_producer.rb +6 -2
- data/lib/deimos/utils/deadlock_retry.rb +68 -0
- data/lib/deimos/utils/lag_reporter.rb +19 -26
- data/lib/deimos/version.rb +1 -1
- data/spec/active_record_batch_consumer_spec.rb +481 -0
- data/spec/active_record_consume/batch_slicer_spec.rb +42 -0
- data/spec/active_record_consume/schema_model_converter_spec.rb +105 -0
- data/spec/active_record_consumer_spec.rb +3 -11
- data/spec/batch_consumer_spec.rb +23 -7
- data/spec/config/configuration_spec.rb +4 -0
- data/spec/consumer_spec.rb +6 -6
- data/spec/deimos_spec.rb +57 -49
- data/spec/handlers/my_batch_consumer.rb +6 -1
- data/spec/handlers/my_consumer.rb +6 -1
- data/spec/message_spec.rb +19 -0
- data/spec/schemas/com/my-namespace/MySchemaCompound-key.avsc +18 -0
- data/spec/schemas/com/my-namespace/Wibble.avsc +43 -0
- data/spec/spec_helper.rb +17 -0
- data/spec/utils/db_poller_spec.rb +2 -2
- data/spec/utils/deadlock_retry_spec.rb +74 -0
- data/spec/utils/lag_reporter_spec.rb +29 -22
- metadata +57 -16
- data/lib/deimos/base_consumer.rb +0 -100
- data/lib/deimos/utils/executor.rb +0 -124
- data/lib/deimos/utils/platform_schema_validation.rb +0 -0
- data/lib/deimos/utils/signal_handler.rb +0 -68
- data/spec/utils/executor_spec.rb +0 -53
- data/spec/utils/signal_handler_spec.rb +0 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 82c1b89b49622cb0d47a7a7aa56e076d908785906581f9f0772f09679acd6895
|
4
|
+
data.tar.gz: eeadf1d1a63db1407a81ef1f993be1b8d421e8e2624b9f42c324a2dad776bd1c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6a85cd1d407c11d4becb6d6d11636623b96c706325f8bbca52caf4bf0cdce079df65432cb076d01503a4d4d977c8a662c22622e27193aeafc87ffbdc8d934045
|
7
|
+
data.tar.gz: '08f9b75f3057071f41effaf870f8d0927dca85d647726ba833371001c470a7a062ee74ac63b5acc2f284493d3a32373ffb6c3ca96f9428ffb762c83cfd804c42'
|
data/CHANGELOG.md
CHANGED
@@ -7,6 +7,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
7
7
|
|
8
8
|
## UNRELEASED
|
9
9
|
|
10
|
+
## 1.8.0-beta1 - 2020-07-06
|
11
|
+
### Features :star:
|
12
|
+
- Added `ActiveRecordConsumer` batch mode
|
13
|
+
|
14
|
+
### Fixes :wrench:
|
15
|
+
- Lag calculation can be incorrect if no messages are being consumed.
|
16
|
+
- Fixed bug where printing messages on a MessageSizeTooLarge
|
17
|
+
error didn't work.
|
18
|
+
|
19
|
+
### Roadmap
|
20
|
+
- Moved SignalHandler and Executor to the `sigurd` gem.
|
21
|
+
|
22
|
+
## 1.7.0-beta1 - 2020-05-12
|
10
23
|
### Features :star:
|
11
24
|
- Added the DB Poller feature / process.
|
12
25
|
|
data/Gemfile.lock
CHANGED
@@ -5,6 +5,7 @@ PATH
|
|
5
5
|
avro_turf (~> 0.11)
|
6
6
|
phobos (~> 1.9)
|
7
7
|
ruby-kafka (~> 0.7)
|
8
|
+
sigurd (= 0.0.1)
|
8
9
|
|
9
10
|
GEM
|
10
11
|
remote: https://rubygems.org/
|
@@ -65,6 +66,7 @@ GEM
|
|
65
66
|
concurrent-ruby-ext (1.1.6)
|
66
67
|
concurrent-ruby (= 1.1.6)
|
67
68
|
crass (1.0.6)
|
69
|
+
database_cleaner (1.8.5)
|
68
70
|
ddtrace (0.35.1)
|
69
71
|
msgpack
|
70
72
|
diff-lcs (1.3)
|
@@ -142,7 +144,7 @@ GEM
|
|
142
144
|
pry (0.13.1)
|
143
145
|
coderay (~> 1.1)
|
144
146
|
method_source (~> 1.0)
|
145
|
-
rack (2.2.
|
147
|
+
rack (2.2.3)
|
146
148
|
rack-test (1.1.0)
|
147
149
|
rack (>= 1.0, < 3)
|
148
150
|
rails (5.2.4.2)
|
@@ -204,6 +206,9 @@ GEM
|
|
204
206
|
digest-crc
|
205
207
|
ruby-progressbar (1.10.1)
|
206
208
|
shellany (0.0.1)
|
209
|
+
sigurd (0.0.1)
|
210
|
+
concurrent-ruby (~> 1)
|
211
|
+
exponential-backoff
|
207
212
|
sprockets (4.0.0)
|
208
213
|
concurrent-ruby (~> 1.0)
|
209
214
|
rack (> 1, < 3)
|
@@ -219,7 +224,7 @@ GEM
|
|
219
224
|
unicode-display_width (1.7.0)
|
220
225
|
websocket-driver (0.7.1)
|
221
226
|
websocket-extensions (>= 0.1.0)
|
222
|
-
websocket-extensions (0.1.
|
227
|
+
websocket-extensions (0.1.5)
|
223
228
|
|
224
229
|
PLATFORMS
|
225
230
|
ruby
|
@@ -228,6 +233,7 @@ DEPENDENCIES
|
|
228
233
|
activerecord (~> 5.2)
|
229
234
|
activerecord-import
|
230
235
|
avro (~> 1.9)
|
236
|
+
database_cleaner (~> 1.7)
|
231
237
|
ddtrace (~> 0.11)
|
232
238
|
deimos-ruby!
|
233
239
|
dogstatsd-ruby (~> 4.2)
|
data/README.md
CHANGED
@@ -314,24 +314,14 @@ messages as an array and then process them together. This can improve
|
|
314
314
|
consumer throughput, depending on the use case. Batch consumers behave like
|
315
315
|
other consumers in regards to key and payload decoding, etc.
|
316
316
|
|
317
|
-
To enable batch consumption, ensure that the `delivery` property
|
317
|
+
To enable batch consumption, ensure that the `delivery` property of your
|
318
|
+
consumer is set to `inline_batch`.
|
318
319
|
|
319
|
-
|
320
|
-
|
321
|
-
consumer do
|
322
|
-
class_name 'Consumers::MyBatchConsumer'
|
323
|
-
topic 'my_batched_topic'
|
324
|
-
group_id 'my_group_id'
|
325
|
-
delivery :inline_batch
|
326
|
-
end
|
327
|
-
end
|
328
|
-
```
|
329
|
-
|
330
|
-
Batch consumers must inherit from the Deimos::BatchConsumer class as in
|
331
|
-
this sample:
|
320
|
+
Batch consumers will invoke the `consume_batch` method instead of `consume`
|
321
|
+
as in this example:
|
332
322
|
|
333
323
|
```ruby
|
334
|
-
class MyBatchConsumer < Deimos::
|
324
|
+
class MyBatchConsumer < Deimos::Consumer
|
335
325
|
|
336
326
|
def consume_batch(payloads, metadata)
|
337
327
|
# payloads is an array of schema-decoded hashes.
|
@@ -533,12 +523,14 @@ class MyConsumer < Deimos::ActiveRecordConsumer
|
|
533
523
|
|
534
524
|
# Optional override of the way to fetch records based on payload and
|
535
525
|
# key. Default is to use the key to search the primary key of the table.
|
526
|
+
# Only used in non-batch mode.
|
536
527
|
def fetch_record(klass, payload, key)
|
537
528
|
super
|
538
529
|
end
|
539
530
|
|
540
531
|
# Optional override on how to set primary key for new records.
|
541
532
|
# Default is to set the class's primary key to the message's decoded key.
|
533
|
+
# Only used in non-batch mode.
|
542
534
|
def assign_key(record, payload, key)
|
543
535
|
super
|
544
536
|
end
|
@@ -546,6 +538,7 @@ class MyConsumer < Deimos::ActiveRecordConsumer
|
|
546
538
|
# Optional override of the default behavior, which is to call `destroy`
|
547
539
|
# on the record - e.g. you can replace this with "archiving" the record
|
548
540
|
# in some way.
|
541
|
+
# Only used in non-batch mode.
|
549
542
|
def destroy_record(record)
|
550
543
|
super
|
551
544
|
end
|
@@ -555,6 +548,64 @@ class MyConsumer < Deimos::ActiveRecordConsumer
|
|
555
548
|
def record_attributes(payload)
|
556
549
|
super.merge(:some_field => 'some_value')
|
557
550
|
end
|
551
|
+
|
552
|
+
# Optional override to change the attributes used for identifying records
|
553
|
+
def record_key(payload)
|
554
|
+
super
|
555
|
+
end
|
556
|
+
end
|
557
|
+
```
|
558
|
+
|
559
|
+
#### Batch Consumers
|
560
|
+
|
561
|
+
Deimos also provides a batch consumption mode for `ActiveRecordConsumer` which
|
562
|
+
processes groups of messages at once using the ActiveRecord backend.
|
563
|
+
|
564
|
+
Batch ActiveRecord consumers make use of the
|
565
|
+
[activerecord-import](https://github.com/zdennis/activerecord-import) to insert
|
566
|
+
or update multiple records in bulk SQL statements. This reduces processing
|
567
|
+
time at the cost of skipping ActiveRecord callbacks for individual records.
|
568
|
+
Deleted records (tombstones) are grouped into `delete_all` calls and thus also
|
569
|
+
skip `destroy` callbacks.
|
570
|
+
|
571
|
+
Batch consumption is used when the `delivery` setting for your consumer is set to `inline_batch`.
|
572
|
+
|
573
|
+
**Note**: Currently, batch consumption only supports only primary keys as identifiers out of the box. See
|
574
|
+
[the specs](spec/active_record_batch_consumer_spec.rb) for an example of how to use compound keys.
|
575
|
+
|
576
|
+
By default, batches will be compacted before processing, i.e. only the last
|
577
|
+
message for each unique key in a batch will actually be processed. To change
|
578
|
+
this behaviour, call `compacted false` inside of your consumer definition.
|
579
|
+
|
580
|
+
A sample batch consumer would look as follows:
|
581
|
+
|
582
|
+
```ruby
|
583
|
+
class MyConsumer < Deimos::ActiveRecordConsumer
|
584
|
+
schema 'MySchema'
|
585
|
+
key_config field: 'my_field'
|
586
|
+
record_class Widget
|
587
|
+
|
588
|
+
# Controls whether the batch is compacted before consuming.
|
589
|
+
# If true, only the last message for each unique key in a batch will be
|
590
|
+
# processed.
|
591
|
+
# If false, messages will be grouped into "slices" of independent keys
|
592
|
+
# and each slice will be imported separately.
|
593
|
+
#
|
594
|
+
# compacted false
|
595
|
+
|
596
|
+
|
597
|
+
# Optional override of the default behavior, which is to call `delete_all`
|
598
|
+
# on the associated records - e.g. you can replace this with setting a deleted
|
599
|
+
# flag on the record.
|
600
|
+
def remove_records(records)
|
601
|
+
super
|
602
|
+
end
|
603
|
+
|
604
|
+
# Optional override to change the attributes of the record before they
|
605
|
+
# are saved.
|
606
|
+
def record_attributes(payload)
|
607
|
+
super.merge(:some_field => 'some_value')
|
608
|
+
end
|
558
609
|
end
|
559
610
|
```
|
560
611
|
|
@@ -853,6 +904,9 @@ Deimos::Utils::InlineConsumer.get_messages_for(
|
|
853
904
|
|
854
905
|
Bug reports and pull requests are welcome on GitHub at https://github.com/flipp-oss/deimos .
|
855
906
|
|
907
|
+
We have more information on the [internal architecture](docs/ARCHITECTURE.md) of Deimos
|
908
|
+
for contributors!
|
909
|
+
|
856
910
|
### Linting
|
857
911
|
|
858
912
|
Deimos uses Rubocop to lint the code. Please run Rubocop on your code
|
data/deimos-ruby.gemspec
CHANGED
@@ -21,10 +21,12 @@ Gem::Specification.new do |spec|
|
|
21
21
|
spec.add_runtime_dependency('avro_turf', '~> 0.11')
|
22
22
|
spec.add_runtime_dependency('phobos', '~> 1.9')
|
23
23
|
spec.add_runtime_dependency('ruby-kafka', '~> 0.7')
|
24
|
+
spec.add_runtime_dependency('sigurd', '0.0.1')
|
24
25
|
|
25
26
|
spec.add_development_dependency('activerecord', '~> 5.2')
|
26
27
|
spec.add_development_dependency('activerecord-import')
|
27
28
|
spec.add_development_dependency('avro', '~> 1.9')
|
29
|
+
spec.add_development_dependency('database_cleaner', '~> 1.7')
|
28
30
|
spec.add_development_dependency('ddtrace', '~> 0.11')
|
29
31
|
spec.add_development_dependency('dogstatsd-ruby', '~> 4.2')
|
30
32
|
spec.add_development_dependency('guard', '~> 2')
|
@@ -0,0 +1,144 @@
|
|
1
|
+
# Deimos Architecture
|
2
|
+
|
3
|
+
Deimos is the third of three libraries that add functionality on top of each
|
4
|
+
other:
|
5
|
+
|
6
|
+
* [RubyKafka](https://github.com/zendesk/ruby-kafka) is the low-level Kafka
|
7
|
+
client, providing API's for producers, consumers and the client as a whole.
|
8
|
+
* [Phobos](https://github.com/phobos/phobos) is a lightweight wrapper on top
|
9
|
+
of RubyKafka that provides threaded consumers, a simpler way to write
|
10
|
+
producers, and lifecycle management.
|
11
|
+
* [Deimos](https://github.com/flipp-oss/deimos/) is a full-featured framework
|
12
|
+
using Phobos as its base which provides schema integration (e.g. Avro),
|
13
|
+
database integration, metrics, tracing, test helpers and other utilities.
|
14
|
+
|
15
|
+
## Folder structure
|
16
|
+
|
17
|
+
As of May 12, 2020, the following are the important files to understand in how
|
18
|
+
Deimos fits together:
|
19
|
+
* `lib/generators`: Generators to generate database migrations, e.g.
|
20
|
+
for the DB Poller and DB Producer features.
|
21
|
+
* `lib/tasks`: Rake tasks for starting consumers, DB Pollers, etc.
|
22
|
+
* `lib/deimos`: Main Deimos code.
|
23
|
+
* `lib/deimos/deimos.rb`: The bootstrap / startup code for Deimos. Also provides
|
24
|
+
some global convenience methods and (for legacy purposes) the way to
|
25
|
+
start the DB Producer.
|
26
|
+
* `lib/deimos/backends`: The different plug-in producer backends - e.g. produce
|
27
|
+
directly to Kafka, use the DB backend, etc.
|
28
|
+
* `lib/deimos/schema_backends`: The different plug-in schema handlers, such
|
29
|
+
as the various flavors of Avro (with/without schema registry etc.)
|
30
|
+
* `lib/deimos/metrics`: The different plug-in metrics providers, e.g. Datadog.
|
31
|
+
* `lib/deimos/tracing`: The different plug-in tracing providers, e.g. Datadog.
|
32
|
+
* `lib/deimos/utils`: Utility classes for things not directly related to
|
33
|
+
producing and consuming, such as the DB Poller, DB Producer, lag reporter, etc.
|
34
|
+
* `lib/deimos/config`: Classes related to configuring Deimos.
|
35
|
+
* `lib/deimos/monkey_patches`: Monkey patches to existing libraries. These
|
36
|
+
should be removed in a future update.
|
37
|
+
|
38
|
+
## Features
|
39
|
+
|
40
|
+
### Producers and Consumers
|
41
|
+
|
42
|
+
Both producers and consumers include the `SharedConfig` module, which
|
43
|
+
standardizes configuration like schema settings, topic, keys, etc.
|
44
|
+
|
45
|
+
Consumers come in two flavors: `Consumer` and `BatchConsumer`. Both include
|
46
|
+
`BaseConsumer` for shared functionality.
|
47
|
+
|
48
|
+
While producing messages go to Kafka by default, literally anything else
|
49
|
+
can happen when your producer calls `produce`, by swapping out the producer
|
50
|
+
_backend_. This is just a file that needs to inherit from `Deimos::Backends::Base`
|
51
|
+
and must implement a single method, `execute`.
|
52
|
+
|
53
|
+
Producers have a complex workflow while processing the payload to publish. This
|
54
|
+
is aided by the `Deimos::Message` class (not to be confused with the
|
55
|
+
`KafkaMessage` class, which is an ActiveRecord used by the DB Producer feature,
|
56
|
+
below).
|
57
|
+
|
58
|
+
### Schemas
|
59
|
+
|
60
|
+
Schema backends are used to encode and decode payloads into different formats
|
61
|
+
such as Avro. These are integrated with producers and consumers, as well
|
62
|
+
as test helpers. These are a bit more involved than producer backends, and
|
63
|
+
must define methods such as:
|
64
|
+
* `encode` a payload or key (when encoding a key, for Avro a key schema
|
65
|
+
may be auto-generated)
|
66
|
+
* `decode` a payload or key
|
67
|
+
* `validate` that a payload is correct for encoding
|
68
|
+
* `coerce` a payload into the given schema (e.g. turn ints into strings)
|
69
|
+
* Get a list of `schema_fields` in the configured schema, used when interacting
|
70
|
+
with ActiveRecord
|
71
|
+
* Define a `mock` backend when the given backend is used. This is used
|
72
|
+
during testing. Typically mock backends will validate values but not
|
73
|
+
actually encode/decode them.
|
74
|
+
|
75
|
+
### Configuration
|
76
|
+
|
77
|
+
Deimos has its own `Configurable` module that makes heavy use of `method_missing`
|
78
|
+
to provide a very succinct but powerful configuration format (including
|
79
|
+
default values, procs, print out as hash, reset, etc.). It also
|
80
|
+
allows for multiple blocks to define different objects of the same time
|
81
|
+
(like producers, consumers, pollers etc.).
|
82
|
+
|
83
|
+
The configuration definition for Deimos is in `config/configuration.rb`. In
|
84
|
+
addition, there are methods in `config/phobos_config.rb` which translate to/from
|
85
|
+
the Phobos configuration format and support the old `phobos.yml` method
|
86
|
+
of configuration.
|
87
|
+
|
88
|
+
### Metrics and Tracing
|
89
|
+
|
90
|
+
These are simpler than other plugins and must implement the expected methods
|
91
|
+
(`increment`, `gauge`, `histogram` and `time` for metrics, and `start`, `finish`
|
92
|
+
and `set_error` for tracing). These are used primarily in producers and consumers.
|
93
|
+
|
94
|
+
### ActiveRecord Integration
|
95
|
+
|
96
|
+
Deimos provides an `ActiveRecordConsumer` and `ActiveRecordProducer`. These are
|
97
|
+
relatively lightweight ways to save data into a database or read it off
|
98
|
+
the database as part of app logic. It uses things like the `coerce` method
|
99
|
+
of the schema backends to manage the differences between the given payload
|
100
|
+
and the configured schema for the topic.
|
101
|
+
|
102
|
+
### Database Backend / Database Producer
|
103
|
+
|
104
|
+
This feature (which provides better performance and transaction guarantees)
|
105
|
+
is powered by two components:
|
106
|
+
* The `db` _publish backend_, which saves messages to the database rather
|
107
|
+
than to Kafka;
|
108
|
+
* The `DbProducer` utility, which runs as a separate process, pulls data
|
109
|
+
from the database and sends it to Kafka.
|
110
|
+
|
111
|
+
There are a set of utility classes that power the producer, which are largely
|
112
|
+
copied from Phobos:
|
113
|
+
* `Executor` takes a set of "runnable" things (which implement a `start` and `stop`
|
114
|
+
method) puts them in a thread pool and runs them all concurrently. It
|
115
|
+
manages starting and stopping all threads when necessary.
|
116
|
+
* `SignalHandler` wraps the Executor and handles SIGINT and SIGTERM signals
|
117
|
+
to stop the executor gracefully.
|
118
|
+
|
119
|
+
In the case of this feature, the `DbProducer` is the runnable object - it
|
120
|
+
can run several threads at once.
|
121
|
+
|
122
|
+
On the database side, the `ActiveRecord` models that power this feature are:
|
123
|
+
* `KafkaMessage`: The actual message, saved to the database. This message
|
124
|
+
is already encoded by the producer, so only has to be sent.
|
125
|
+
* `KafkaTopicInfo`: Used for locking topics so only one producer can work
|
126
|
+
on it at once.
|
127
|
+
|
128
|
+
A Rake task (defined in `deimos.rake`) can be used to start the producer.
|
129
|
+
|
130
|
+
### Database Poller
|
131
|
+
|
132
|
+
This feature (which periodically polls the database to send Kafka messages)
|
133
|
+
primarily uses other aspects of Deimos and hence is relatively small in size.
|
134
|
+
The `DbPoller` class acts as a "runnable" and is used by an Executor (above).
|
135
|
+
The `PollInfo` class is saved to the database to keep track of where each
|
136
|
+
poller is up to.
|
137
|
+
|
138
|
+
A Rake task (defined in `deimos.rake`) can be used to start the pollers.
|
139
|
+
|
140
|
+
### Other Utilities
|
141
|
+
|
142
|
+
The `utils` folder also contains the `LagReporter` (which sends metrics on
|
143
|
+
lag) and the `InlineConsumer`, which can read data from a topic and directly
|
144
|
+
pass it into a handler or save it to memory.
|
data/docs/CONFIGURATION.md
CHANGED
@@ -58,6 +58,10 @@ Deimos.configure do
|
|
58
58
|
namespace 'my.namespace'
|
59
59
|
key_config field: :id
|
60
60
|
|
61
|
+
# Setting to :inline_batch will invoke consume_batch instead of consume
|
62
|
+
# for each batch of messages.
|
63
|
+
delivery :batch
|
64
|
+
|
61
65
|
# If config.schema.path is app/schemas, assumes there is a file in
|
62
66
|
# app/schemas/my/namespace/MyTopicSchema.avsc
|
63
67
|
end
|
data/lib/deimos.rb
CHANGED
@@ -28,8 +28,8 @@ if defined?(ActiveRecord)
|
|
28
28
|
require 'deimos/kafka_source'
|
29
29
|
require 'deimos/kafka_topic_info'
|
30
30
|
require 'deimos/backends/db'
|
31
|
-
require '
|
32
|
-
require '
|
31
|
+
require 'sigurd/signal_handler.rb'
|
32
|
+
require 'sigurd/executor.rb'
|
33
33
|
require 'deimos/utils/db_producer.rb'
|
34
34
|
require 'deimos/utils/db_poller'
|
35
35
|
end
|
@@ -72,10 +72,10 @@ module Deimos
|
|
72
72
|
Deimos::Utils::DbProducer.
|
73
73
|
new(self.config.db_producer.logger || self.config.logger)
|
74
74
|
end
|
75
|
-
executor =
|
76
|
-
|
77
|
-
|
78
|
-
signal_handler =
|
75
|
+
executor = Sigurd::Executor.new(producers,
|
76
|
+
sleep_seconds: 5,
|
77
|
+
logger: self.config.logger)
|
78
|
+
signal_handler = Sigurd::SignalHandler.new(executor)
|
79
79
|
signal_handler.run!
|
80
80
|
end
|
81
81
|
end
|
@@ -0,0 +1,159 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'deimos/active_record_consume/batch_slicer'
|
4
|
+
require 'deimos/utils/deadlock_retry'
|
5
|
+
require 'deimos/message'
|
6
|
+
|
7
|
+
module Deimos
|
8
|
+
module ActiveRecordConsume
|
9
|
+
# Methods for consuming batches of messages and saving them to the database
|
10
|
+
# in bulk ActiveRecord operations.
|
11
|
+
module BatchConsumption
|
12
|
+
# Handle a batch of Kafka messages. Batches are split into "slices",
|
13
|
+
# which are groups of independent messages that can be processed together
|
14
|
+
# in a single database operation.
|
15
|
+
# If two messages in a batch have the same key, we cannot process them
|
16
|
+
# in the same operation as they would interfere with each other. Thus
|
17
|
+
# they are split
|
18
|
+
# @param payloads [Array<Hash>] Decoded payloads.
|
19
|
+
# @param metadata [Hash] Information about batch, including keys.
|
20
|
+
def consume_batch(payloads, metadata)
|
21
|
+
messages = payloads.
|
22
|
+
zip(metadata[:keys]).
|
23
|
+
map { |p, k| Deimos::Message.new(p, nil, key: k) }
|
24
|
+
|
25
|
+
tags = %W(topic:#{metadata[:topic]})
|
26
|
+
|
27
|
+
Deimos.instrument('ar_consumer.consume_batch', tags) do
|
28
|
+
# The entire batch should be treated as one transaction so that if
|
29
|
+
# any message fails, the whole thing is rolled back or retried
|
30
|
+
# if there is deadlock
|
31
|
+
Deimos::Utils::DeadlockRetry.wrap(tags) do
|
32
|
+
if @compacted || self.class.config[:no_keys]
|
33
|
+
update_database(compact_messages(messages))
|
34
|
+
else
|
35
|
+
uncompacted_update(messages)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# Get unique key for the ActiveRecord instance from the incoming key.
|
42
|
+
# Override this method (with super) to customize the set of attributes that
|
43
|
+
# uniquely identifies each record in the database.
|
44
|
+
# @param key [String] The encoded key.
|
45
|
+
# @return [Hash] The key attributes.
|
46
|
+
def record_key(key)
|
47
|
+
decoded_key = decode_key(key)
|
48
|
+
|
49
|
+
if decoded_key.nil?
|
50
|
+
{}
|
51
|
+
elsif decoded_key.is_a?(Hash)
|
52
|
+
@key_converter.convert(decoded_key)
|
53
|
+
else
|
54
|
+
{ @klass.primary_key => decoded_key }
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
protected
|
59
|
+
|
60
|
+
# Perform database operations for a batch of messages without compaction.
|
61
|
+
# All messages are split into slices containing only unique keys, and
|
62
|
+
# each slice is handles as its own batch.
|
63
|
+
# @param messages [Array<Message>] List of messages.
|
64
|
+
def uncompacted_update(messages)
|
65
|
+
BatchSlicer.
|
66
|
+
slice(messages).
|
67
|
+
each(&method(:update_database))
|
68
|
+
end
|
69
|
+
|
70
|
+
# Perform database operations for a group of messages.
|
71
|
+
# All messages with payloads are passed to upsert_records.
|
72
|
+
# All tombstones messages are passed to remove_records.
|
73
|
+
# @param messages [Array<Message>] List of messages.
|
74
|
+
def update_database(messages)
|
75
|
+
# Find all upserted records (i.e. that have a payload) and all
|
76
|
+
# deleted record (no payload)
|
77
|
+
removed, upserted = messages.partition(&:tombstone?)
|
78
|
+
|
79
|
+
upsert_records(upserted) if upserted.any?
|
80
|
+
remove_records(removed) if removed.any?
|
81
|
+
end
|
82
|
+
|
83
|
+
# Upsert any non-deleted records
|
84
|
+
# @param messages [Array<Message>] List of messages for a group of
|
85
|
+
# records to either be updated or inserted.
|
86
|
+
def upsert_records(messages)
|
87
|
+
key_cols = key_columns(messages)
|
88
|
+
|
89
|
+
# Create payloads with payload + key attributes
|
90
|
+
upserts = messages.map do |m|
|
91
|
+
record_attributes(m.payload, m.key)&.
|
92
|
+
merge(record_key(m.key))
|
93
|
+
end
|
94
|
+
|
95
|
+
# If overridden record_attributes indicated no record, skip
|
96
|
+
upserts.compact!
|
97
|
+
|
98
|
+
options = if key_cols.empty?
|
99
|
+
{} # Can't upsert with no key, just do regular insert
|
100
|
+
else
|
101
|
+
{
|
102
|
+
on_duplicate_key_update: {
|
103
|
+
# conflict_target must explicitly list the columns for
|
104
|
+
# Postgres and SQLite. Not required for MySQL, but this
|
105
|
+
# ensures consistent behaviour.
|
106
|
+
conflict_target: key_cols,
|
107
|
+
columns: :all
|
108
|
+
}
|
109
|
+
}
|
110
|
+
end
|
111
|
+
|
112
|
+
@klass.import!(upserts, options)
|
113
|
+
end
|
114
|
+
|
115
|
+
# Delete any records with a tombstone.
|
116
|
+
# @param messages [Array<Message>] List of messages for a group of
|
117
|
+
# deleted records.
|
118
|
+
def remove_records(messages)
|
119
|
+
clause = deleted_query(messages)
|
120
|
+
|
121
|
+
clause.delete_all
|
122
|
+
end
|
123
|
+
|
124
|
+
# Create an ActiveRecord relation that matches all of the passed
|
125
|
+
# records. Used for bulk deletion.
|
126
|
+
# @param records [Array<Message>] List of messages.
|
127
|
+
# @return ActiveRecord::Relation Matching relation.
|
128
|
+
def deleted_query(records)
|
129
|
+
keys = records.
|
130
|
+
map { |m| record_key(m.key)[@klass.primary_key] }.
|
131
|
+
reject(&:nil?)
|
132
|
+
|
133
|
+
@klass.unscoped.where(@klass.primary_key => keys)
|
134
|
+
end
|
135
|
+
|
136
|
+
# Get the set of attribute names that uniquely identify messages in the
|
137
|
+
# batch. Requires at least one record.
|
138
|
+
# @param records [Array<Message>] Non-empty list of messages.
|
139
|
+
# @return [Array<String>] List of attribute names.
|
140
|
+
# @raise If records is empty.
|
141
|
+
def key_columns(records)
|
142
|
+
raise 'Cannot determine key from empty batch' if records.empty?
|
143
|
+
|
144
|
+
first_key = records.first.key
|
145
|
+
record_key(first_key).keys
|
146
|
+
end
|
147
|
+
|
148
|
+
# Compact a batch of messages, taking only the last message for each
|
149
|
+
# unique key.
|
150
|
+
# @param batch [Array<Message>] Batch of messages.
|
151
|
+
# @return [Array<Message>] Compacted batch.
|
152
|
+
def compact_messages(batch)
|
153
|
+
return batch unless batch.first&.key.present?
|
154
|
+
|
155
|
+
batch.reverse.uniq(&:key).reverse!
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|