deimos-ruby 1.7.0.pre.beta1 → 1.8.0.pre.beta1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/Gemfile.lock +8 -2
- data/README.md +69 -15
- data/deimos-ruby.gemspec +2 -0
- data/docs/ARCHITECTURE.md +144 -0
- data/docs/CONFIGURATION.md +4 -0
- data/lib/deimos.rb +6 -6
- data/lib/deimos/active_record_consume/batch_consumption.rb +159 -0
- data/lib/deimos/active_record_consume/batch_slicer.rb +27 -0
- data/lib/deimos/active_record_consume/message_consumption.rb +58 -0
- data/lib/deimos/active_record_consume/schema_model_converter.rb +52 -0
- data/lib/deimos/active_record_consumer.rb +33 -75
- data/lib/deimos/batch_consumer.rb +2 -142
- data/lib/deimos/config/configuration.rb +8 -10
- data/lib/deimos/consume/batch_consumption.rb +148 -0
- data/lib/deimos/consume/message_consumption.rb +93 -0
- data/lib/deimos/consumer.rb +79 -72
- data/lib/deimos/kafka_message.rb +1 -1
- data/lib/deimos/message.rb +6 -1
- data/lib/deimos/utils/db_poller.rb +6 -6
- data/lib/deimos/utils/db_producer.rb +6 -2
- data/lib/deimos/utils/deadlock_retry.rb +68 -0
- data/lib/deimos/utils/lag_reporter.rb +19 -26
- data/lib/deimos/version.rb +1 -1
- data/spec/active_record_batch_consumer_spec.rb +481 -0
- data/spec/active_record_consume/batch_slicer_spec.rb +42 -0
- data/spec/active_record_consume/schema_model_converter_spec.rb +105 -0
- data/spec/active_record_consumer_spec.rb +3 -11
- data/spec/batch_consumer_spec.rb +23 -7
- data/spec/config/configuration_spec.rb +4 -0
- data/spec/consumer_spec.rb +6 -6
- data/spec/deimos_spec.rb +57 -49
- data/spec/handlers/my_batch_consumer.rb +6 -1
- data/spec/handlers/my_consumer.rb +6 -1
- data/spec/message_spec.rb +19 -0
- data/spec/schemas/com/my-namespace/MySchemaCompound-key.avsc +18 -0
- data/spec/schemas/com/my-namespace/Wibble.avsc +43 -0
- data/spec/spec_helper.rb +17 -0
- data/spec/utils/db_poller_spec.rb +2 -2
- data/spec/utils/deadlock_retry_spec.rb +74 -0
- data/spec/utils/lag_reporter_spec.rb +29 -22
- metadata +57 -16
- data/lib/deimos/base_consumer.rb +0 -100
- data/lib/deimos/utils/executor.rb +0 -124
- data/lib/deimos/utils/platform_schema_validation.rb +0 -0
- data/lib/deimos/utils/signal_handler.rb +0 -68
- data/spec/utils/executor_spec.rb +0 -53
- data/spec/utils/signal_handler_spec.rb +0 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 82c1b89b49622cb0d47a7a7aa56e076d908785906581f9f0772f09679acd6895
|
4
|
+
data.tar.gz: eeadf1d1a63db1407a81ef1f993be1b8d421e8e2624b9f42c324a2dad776bd1c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6a85cd1d407c11d4becb6d6d11636623b96c706325f8bbca52caf4bf0cdce079df65432cb076d01503a4d4d977c8a662c22622e27193aeafc87ffbdc8d934045
|
7
|
+
data.tar.gz: '08f9b75f3057071f41effaf870f8d0927dca85d647726ba833371001c470a7a062ee74ac63b5acc2f284493d3a32373ffb6c3ca96f9428ffb762c83cfd804c42'
|
data/CHANGELOG.md
CHANGED
@@ -7,6 +7,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
7
7
|
|
8
8
|
## UNRELEASED
|
9
9
|
|
10
|
+
## 1.8.0-beta1 - 2020-07-06
|
11
|
+
### Features :star:
|
12
|
+
- Added `ActiveRecordConsumer` batch mode
|
13
|
+
|
14
|
+
### Fixes :wrench:
|
15
|
+
- Lag calculation can be incorrect if no messages are being consumed.
|
16
|
+
- Fixed bug where printing messages on a MessageSizeTooLarge
|
17
|
+
error didn't work.
|
18
|
+
|
19
|
+
### Roadmap
|
20
|
+
- Moved SignalHandler and Executor to the `sigurd` gem.
|
21
|
+
|
22
|
+
## 1.7.0-beta1 - 2020-05-12
|
10
23
|
### Features :star:
|
11
24
|
- Added the DB Poller feature / process.
|
12
25
|
|
data/Gemfile.lock
CHANGED
@@ -5,6 +5,7 @@ PATH
|
|
5
5
|
avro_turf (~> 0.11)
|
6
6
|
phobos (~> 1.9)
|
7
7
|
ruby-kafka (~> 0.7)
|
8
|
+
sigurd (= 0.0.1)
|
8
9
|
|
9
10
|
GEM
|
10
11
|
remote: https://rubygems.org/
|
@@ -65,6 +66,7 @@ GEM
|
|
65
66
|
concurrent-ruby-ext (1.1.6)
|
66
67
|
concurrent-ruby (= 1.1.6)
|
67
68
|
crass (1.0.6)
|
69
|
+
database_cleaner (1.8.5)
|
68
70
|
ddtrace (0.35.1)
|
69
71
|
msgpack
|
70
72
|
diff-lcs (1.3)
|
@@ -142,7 +144,7 @@ GEM
|
|
142
144
|
pry (0.13.1)
|
143
145
|
coderay (~> 1.1)
|
144
146
|
method_source (~> 1.0)
|
145
|
-
rack (2.2.
|
147
|
+
rack (2.2.3)
|
146
148
|
rack-test (1.1.0)
|
147
149
|
rack (>= 1.0, < 3)
|
148
150
|
rails (5.2.4.2)
|
@@ -204,6 +206,9 @@ GEM
|
|
204
206
|
digest-crc
|
205
207
|
ruby-progressbar (1.10.1)
|
206
208
|
shellany (0.0.1)
|
209
|
+
sigurd (0.0.1)
|
210
|
+
concurrent-ruby (~> 1)
|
211
|
+
exponential-backoff
|
207
212
|
sprockets (4.0.0)
|
208
213
|
concurrent-ruby (~> 1.0)
|
209
214
|
rack (> 1, < 3)
|
@@ -219,7 +224,7 @@ GEM
|
|
219
224
|
unicode-display_width (1.7.0)
|
220
225
|
websocket-driver (0.7.1)
|
221
226
|
websocket-extensions (>= 0.1.0)
|
222
|
-
websocket-extensions (0.1.
|
227
|
+
websocket-extensions (0.1.5)
|
223
228
|
|
224
229
|
PLATFORMS
|
225
230
|
ruby
|
@@ -228,6 +233,7 @@ DEPENDENCIES
|
|
228
233
|
activerecord (~> 5.2)
|
229
234
|
activerecord-import
|
230
235
|
avro (~> 1.9)
|
236
|
+
database_cleaner (~> 1.7)
|
231
237
|
ddtrace (~> 0.11)
|
232
238
|
deimos-ruby!
|
233
239
|
dogstatsd-ruby (~> 4.2)
|
data/README.md
CHANGED
@@ -314,24 +314,14 @@ messages as an array and then process them together. This can improve
|
|
314
314
|
consumer throughput, depending on the use case. Batch consumers behave like
|
315
315
|
other consumers in regards to key and payload decoding, etc.
|
316
316
|
|
317
|
-
To enable batch consumption, ensure that the `delivery` property
|
317
|
+
To enable batch consumption, ensure that the `delivery` property of your
|
318
|
+
consumer is set to `inline_batch`.
|
318
319
|
|
319
|
-
|
320
|
-
|
321
|
-
consumer do
|
322
|
-
class_name 'Consumers::MyBatchConsumer'
|
323
|
-
topic 'my_batched_topic'
|
324
|
-
group_id 'my_group_id'
|
325
|
-
delivery :inline_batch
|
326
|
-
end
|
327
|
-
end
|
328
|
-
```
|
329
|
-
|
330
|
-
Batch consumers must inherit from the Deimos::BatchConsumer class as in
|
331
|
-
this sample:
|
320
|
+
Batch consumers will invoke the `consume_batch` method instead of `consume`
|
321
|
+
as in this example:
|
332
322
|
|
333
323
|
```ruby
|
334
|
-
class MyBatchConsumer < Deimos::
|
324
|
+
class MyBatchConsumer < Deimos::Consumer
|
335
325
|
|
336
326
|
def consume_batch(payloads, metadata)
|
337
327
|
# payloads is an array of schema-decoded hashes.
|
@@ -533,12 +523,14 @@ class MyConsumer < Deimos::ActiveRecordConsumer
|
|
533
523
|
|
534
524
|
# Optional override of the way to fetch records based on payload and
|
535
525
|
# key. Default is to use the key to search the primary key of the table.
|
526
|
+
# Only used in non-batch mode.
|
536
527
|
def fetch_record(klass, payload, key)
|
537
528
|
super
|
538
529
|
end
|
539
530
|
|
540
531
|
# Optional override on how to set primary key for new records.
|
541
532
|
# Default is to set the class's primary key to the message's decoded key.
|
533
|
+
# Only used in non-batch mode.
|
542
534
|
def assign_key(record, payload, key)
|
543
535
|
super
|
544
536
|
end
|
@@ -546,6 +538,7 @@ class MyConsumer < Deimos::ActiveRecordConsumer
|
|
546
538
|
# Optional override of the default behavior, which is to call `destroy`
|
547
539
|
# on the record - e.g. you can replace this with "archiving" the record
|
548
540
|
# in some way.
|
541
|
+
# Only used in non-batch mode.
|
549
542
|
def destroy_record(record)
|
550
543
|
super
|
551
544
|
end
|
@@ -555,6 +548,64 @@ class MyConsumer < Deimos::ActiveRecordConsumer
|
|
555
548
|
def record_attributes(payload)
|
556
549
|
super.merge(:some_field => 'some_value')
|
557
550
|
end
|
551
|
+
|
552
|
+
# Optional override to change the attributes used for identifying records
|
553
|
+
def record_key(payload)
|
554
|
+
super
|
555
|
+
end
|
556
|
+
end
|
557
|
+
```
|
558
|
+
|
559
|
+
#### Batch Consumers
|
560
|
+
|
561
|
+
Deimos also provides a batch consumption mode for `ActiveRecordConsumer` which
|
562
|
+
processes groups of messages at once using the ActiveRecord backend.
|
563
|
+
|
564
|
+
Batch ActiveRecord consumers make use of the
|
565
|
+
[activerecord-import](https://github.com/zdennis/activerecord-import) to insert
|
566
|
+
or update multiple records in bulk SQL statements. This reduces processing
|
567
|
+
time at the cost of skipping ActiveRecord callbacks for individual records.
|
568
|
+
Deleted records (tombstones) are grouped into `delete_all` calls and thus also
|
569
|
+
skip `destroy` callbacks.
|
570
|
+
|
571
|
+
Batch consumption is used when the `delivery` setting for your consumer is set to `inline_batch`.
|
572
|
+
|
573
|
+
**Note**: Currently, batch consumption only supports only primary keys as identifiers out of the box. See
|
574
|
+
[the specs](spec/active_record_batch_consumer_spec.rb) for an example of how to use compound keys.
|
575
|
+
|
576
|
+
By default, batches will be compacted before processing, i.e. only the last
|
577
|
+
message for each unique key in a batch will actually be processed. To change
|
578
|
+
this behaviour, call `compacted false` inside of your consumer definition.
|
579
|
+
|
580
|
+
A sample batch consumer would look as follows:
|
581
|
+
|
582
|
+
```ruby
|
583
|
+
class MyConsumer < Deimos::ActiveRecordConsumer
|
584
|
+
schema 'MySchema'
|
585
|
+
key_config field: 'my_field'
|
586
|
+
record_class Widget
|
587
|
+
|
588
|
+
# Controls whether the batch is compacted before consuming.
|
589
|
+
# If true, only the last message for each unique key in a batch will be
|
590
|
+
# processed.
|
591
|
+
# If false, messages will be grouped into "slices" of independent keys
|
592
|
+
# and each slice will be imported separately.
|
593
|
+
#
|
594
|
+
# compacted false
|
595
|
+
|
596
|
+
|
597
|
+
# Optional override of the default behavior, which is to call `delete_all`
|
598
|
+
# on the associated records - e.g. you can replace this with setting a deleted
|
599
|
+
# flag on the record.
|
600
|
+
def remove_records(records)
|
601
|
+
super
|
602
|
+
end
|
603
|
+
|
604
|
+
# Optional override to change the attributes of the record before they
|
605
|
+
# are saved.
|
606
|
+
def record_attributes(payload)
|
607
|
+
super.merge(:some_field => 'some_value')
|
608
|
+
end
|
558
609
|
end
|
559
610
|
```
|
560
611
|
|
@@ -853,6 +904,9 @@ Deimos::Utils::InlineConsumer.get_messages_for(
|
|
853
904
|
|
854
905
|
Bug reports and pull requests are welcome on GitHub at https://github.com/flipp-oss/deimos .
|
855
906
|
|
907
|
+
We have more information on the [internal architecture](docs/ARCHITECTURE.md) of Deimos
|
908
|
+
for contributors!
|
909
|
+
|
856
910
|
### Linting
|
857
911
|
|
858
912
|
Deimos uses Rubocop to lint the code. Please run Rubocop on your code
|
data/deimos-ruby.gemspec
CHANGED
@@ -21,10 +21,12 @@ Gem::Specification.new do |spec|
|
|
21
21
|
spec.add_runtime_dependency('avro_turf', '~> 0.11')
|
22
22
|
spec.add_runtime_dependency('phobos', '~> 1.9')
|
23
23
|
spec.add_runtime_dependency('ruby-kafka', '~> 0.7')
|
24
|
+
spec.add_runtime_dependency('sigurd', '0.0.1')
|
24
25
|
|
25
26
|
spec.add_development_dependency('activerecord', '~> 5.2')
|
26
27
|
spec.add_development_dependency('activerecord-import')
|
27
28
|
spec.add_development_dependency('avro', '~> 1.9')
|
29
|
+
spec.add_development_dependency('database_cleaner', '~> 1.7')
|
28
30
|
spec.add_development_dependency('ddtrace', '~> 0.11')
|
29
31
|
spec.add_development_dependency('dogstatsd-ruby', '~> 4.2')
|
30
32
|
spec.add_development_dependency('guard', '~> 2')
|
@@ -0,0 +1,144 @@
|
|
1
|
+
# Deimos Architecture
|
2
|
+
|
3
|
+
Deimos is the third of three libraries that add functionality on top of each
|
4
|
+
other:
|
5
|
+
|
6
|
+
* [RubyKafka](https://github.com/zendesk/ruby-kafka) is the low-level Kafka
|
7
|
+
client, providing API's for producers, consumers and the client as a whole.
|
8
|
+
* [Phobos](https://github.com/phobos/phobos) is a lightweight wrapper on top
|
9
|
+
of RubyKafka that provides threaded consumers, a simpler way to write
|
10
|
+
producers, and lifecycle management.
|
11
|
+
* [Deimos](https://github.com/flipp-oss/deimos/) is a full-featured framework
|
12
|
+
using Phobos as its base which provides schema integration (e.g. Avro),
|
13
|
+
database integration, metrics, tracing, test helpers and other utilities.
|
14
|
+
|
15
|
+
## Folder structure
|
16
|
+
|
17
|
+
As of May 12, 2020, the following are the important files to understand in how
|
18
|
+
Deimos fits together:
|
19
|
+
* `lib/generators`: Generators to generate database migrations, e.g.
|
20
|
+
for the DB Poller and DB Producer features.
|
21
|
+
* `lib/tasks`: Rake tasks for starting consumers, DB Pollers, etc.
|
22
|
+
* `lib/deimos`: Main Deimos code.
|
23
|
+
* `lib/deimos/deimos.rb`: The bootstrap / startup code for Deimos. Also provides
|
24
|
+
some global convenience methods and (for legacy purposes) the way to
|
25
|
+
start the DB Producer.
|
26
|
+
* `lib/deimos/backends`: The different plug-in producer backends - e.g. produce
|
27
|
+
directly to Kafka, use the DB backend, etc.
|
28
|
+
* `lib/deimos/schema_backends`: The different plug-in schema handlers, such
|
29
|
+
as the various flavors of Avro (with/without schema registry etc.)
|
30
|
+
* `lib/deimos/metrics`: The different plug-in metrics providers, e.g. Datadog.
|
31
|
+
* `lib/deimos/tracing`: The different plug-in tracing providers, e.g. Datadog.
|
32
|
+
* `lib/deimos/utils`: Utility classes for things not directly related to
|
33
|
+
producing and consuming, such as the DB Poller, DB Producer, lag reporter, etc.
|
34
|
+
* `lib/deimos/config`: Classes related to configuring Deimos.
|
35
|
+
* `lib/deimos/monkey_patches`: Monkey patches to existing libraries. These
|
36
|
+
should be removed in a future update.
|
37
|
+
|
38
|
+
## Features
|
39
|
+
|
40
|
+
### Producers and Consumers
|
41
|
+
|
42
|
+
Both producers and consumers include the `SharedConfig` module, which
|
43
|
+
standardizes configuration like schema settings, topic, keys, etc.
|
44
|
+
|
45
|
+
Consumers come in two flavors: `Consumer` and `BatchConsumer`. Both include
|
46
|
+
`BaseConsumer` for shared functionality.
|
47
|
+
|
48
|
+
While producing messages go to Kafka by default, literally anything else
|
49
|
+
can happen when your producer calls `produce`, by swapping out the producer
|
50
|
+
_backend_. This is just a file that needs to inherit from `Deimos::Backends::Base`
|
51
|
+
and must implement a single method, `execute`.
|
52
|
+
|
53
|
+
Producers have a complex workflow while processing the payload to publish. This
|
54
|
+
is aided by the `Deimos::Message` class (not to be confused with the
|
55
|
+
`KafkaMessage` class, which is an ActiveRecord used by the DB Producer feature,
|
56
|
+
below).
|
57
|
+
|
58
|
+
### Schemas
|
59
|
+
|
60
|
+
Schema backends are used to encode and decode payloads into different formats
|
61
|
+
such as Avro. These are integrated with producers and consumers, as well
|
62
|
+
as test helpers. These are a bit more involved than producer backends, and
|
63
|
+
must define methods such as:
|
64
|
+
* `encode` a payload or key (when encoding a key, for Avro a key schema
|
65
|
+
may be auto-generated)
|
66
|
+
* `decode` a payload or key
|
67
|
+
* `validate` that a payload is correct for encoding
|
68
|
+
* `coerce` a payload into the given schema (e.g. turn ints into strings)
|
69
|
+
* Get a list of `schema_fields` in the configured schema, used when interacting
|
70
|
+
with ActiveRecord
|
71
|
+
* Define a `mock` backend when the given backend is used. This is used
|
72
|
+
during testing. Typically mock backends will validate values but not
|
73
|
+
actually encode/decode them.
|
74
|
+
|
75
|
+
### Configuration
|
76
|
+
|
77
|
+
Deimos has its own `Configurable` module that makes heavy use of `method_missing`
|
78
|
+
to provide a very succinct but powerful configuration format (including
|
79
|
+
default values, procs, print out as hash, reset, etc.). It also
|
80
|
+
allows for multiple blocks to define different objects of the same time
|
81
|
+
(like producers, consumers, pollers etc.).
|
82
|
+
|
83
|
+
The configuration definition for Deimos is in `config/configuration.rb`. In
|
84
|
+
addition, there are methods in `config/phobos_config.rb` which translate to/from
|
85
|
+
the Phobos configuration format and support the old `phobos.yml` method
|
86
|
+
of configuration.
|
87
|
+
|
88
|
+
### Metrics and Tracing
|
89
|
+
|
90
|
+
These are simpler than other plugins and must implement the expected methods
|
91
|
+
(`increment`, `gauge`, `histogram` and `time` for metrics, and `start`, `finish`
|
92
|
+
and `set_error` for tracing). These are used primarily in producers and consumers.
|
93
|
+
|
94
|
+
### ActiveRecord Integration
|
95
|
+
|
96
|
+
Deimos provides an `ActiveRecordConsumer` and `ActiveRecordProducer`. These are
|
97
|
+
relatively lightweight ways to save data into a database or read it off
|
98
|
+
the database as part of app logic. It uses things like the `coerce` method
|
99
|
+
of the schema backends to manage the differences between the given payload
|
100
|
+
and the configured schema for the topic.
|
101
|
+
|
102
|
+
### Database Backend / Database Producer
|
103
|
+
|
104
|
+
This feature (which provides better performance and transaction guarantees)
|
105
|
+
is powered by two components:
|
106
|
+
* The `db` _publish backend_, which saves messages to the database rather
|
107
|
+
than to Kafka;
|
108
|
+
* The `DbProducer` utility, which runs as a separate process, pulls data
|
109
|
+
from the database and sends it to Kafka.
|
110
|
+
|
111
|
+
There are a set of utility classes that power the producer, which are largely
|
112
|
+
copied from Phobos:
|
113
|
+
* `Executor` takes a set of "runnable" things (which implement a `start` and `stop`
|
114
|
+
method) puts them in a thread pool and runs them all concurrently. It
|
115
|
+
manages starting and stopping all threads when necessary.
|
116
|
+
* `SignalHandler` wraps the Executor and handles SIGINT and SIGTERM signals
|
117
|
+
to stop the executor gracefully.
|
118
|
+
|
119
|
+
In the case of this feature, the `DbProducer` is the runnable object - it
|
120
|
+
can run several threads at once.
|
121
|
+
|
122
|
+
On the database side, the `ActiveRecord` models that power this feature are:
|
123
|
+
* `KafkaMessage`: The actual message, saved to the database. This message
|
124
|
+
is already encoded by the producer, so only has to be sent.
|
125
|
+
* `KafkaTopicInfo`: Used for locking topics so only one producer can work
|
126
|
+
on it at once.
|
127
|
+
|
128
|
+
A Rake task (defined in `deimos.rake`) can be used to start the producer.
|
129
|
+
|
130
|
+
### Database Poller
|
131
|
+
|
132
|
+
This feature (which periodically polls the database to send Kafka messages)
|
133
|
+
primarily uses other aspects of Deimos and hence is relatively small in size.
|
134
|
+
The `DbPoller` class acts as a "runnable" and is used by an Executor (above).
|
135
|
+
The `PollInfo` class is saved to the database to keep track of where each
|
136
|
+
poller is up to.
|
137
|
+
|
138
|
+
A Rake task (defined in `deimos.rake`) can be used to start the pollers.
|
139
|
+
|
140
|
+
### Other Utilities
|
141
|
+
|
142
|
+
The `utils` folder also contains the `LagReporter` (which sends metrics on
|
143
|
+
lag) and the `InlineConsumer`, which can read data from a topic and directly
|
144
|
+
pass it into a handler or save it to memory.
|
data/docs/CONFIGURATION.md
CHANGED
@@ -58,6 +58,10 @@ Deimos.configure do
|
|
58
58
|
namespace 'my.namespace'
|
59
59
|
key_config field: :id
|
60
60
|
|
61
|
+
# Setting to :inline_batch will invoke consume_batch instead of consume
|
62
|
+
# for each batch of messages.
|
63
|
+
delivery :batch
|
64
|
+
|
61
65
|
# If config.schema.path is app/schemas, assumes there is a file in
|
62
66
|
# app/schemas/my/namespace/MyTopicSchema.avsc
|
63
67
|
end
|
data/lib/deimos.rb
CHANGED
@@ -28,8 +28,8 @@ if defined?(ActiveRecord)
|
|
28
28
|
require 'deimos/kafka_source'
|
29
29
|
require 'deimos/kafka_topic_info'
|
30
30
|
require 'deimos/backends/db'
|
31
|
-
require '
|
32
|
-
require '
|
31
|
+
require 'sigurd/signal_handler.rb'
|
32
|
+
require 'sigurd/executor.rb'
|
33
33
|
require 'deimos/utils/db_producer.rb'
|
34
34
|
require 'deimos/utils/db_poller'
|
35
35
|
end
|
@@ -72,10 +72,10 @@ module Deimos
|
|
72
72
|
Deimos::Utils::DbProducer.
|
73
73
|
new(self.config.db_producer.logger || self.config.logger)
|
74
74
|
end
|
75
|
-
executor =
|
76
|
-
|
77
|
-
|
78
|
-
signal_handler =
|
75
|
+
executor = Sigurd::Executor.new(producers,
|
76
|
+
sleep_seconds: 5,
|
77
|
+
logger: self.config.logger)
|
78
|
+
signal_handler = Sigurd::SignalHandler.new(executor)
|
79
79
|
signal_handler.run!
|
80
80
|
end
|
81
81
|
end
|
@@ -0,0 +1,159 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'deimos/active_record_consume/batch_slicer'
|
4
|
+
require 'deimos/utils/deadlock_retry'
|
5
|
+
require 'deimos/message'
|
6
|
+
|
7
|
+
module Deimos
|
8
|
+
module ActiveRecordConsume
|
9
|
+
# Methods for consuming batches of messages and saving them to the database
|
10
|
+
# in bulk ActiveRecord operations.
|
11
|
+
module BatchConsumption
|
12
|
+
# Handle a batch of Kafka messages. Batches are split into "slices",
|
13
|
+
# which are groups of independent messages that can be processed together
|
14
|
+
# in a single database operation.
|
15
|
+
# If two messages in a batch have the same key, we cannot process them
|
16
|
+
# in the same operation as they would interfere with each other. Thus
|
17
|
+
# they are split
|
18
|
+
# @param payloads [Array<Hash>] Decoded payloads.
|
19
|
+
# @param metadata [Hash] Information about batch, including keys.
|
20
|
+
def consume_batch(payloads, metadata)
|
21
|
+
messages = payloads.
|
22
|
+
zip(metadata[:keys]).
|
23
|
+
map { |p, k| Deimos::Message.new(p, nil, key: k) }
|
24
|
+
|
25
|
+
tags = %W(topic:#{metadata[:topic]})
|
26
|
+
|
27
|
+
Deimos.instrument('ar_consumer.consume_batch', tags) do
|
28
|
+
# The entire batch should be treated as one transaction so that if
|
29
|
+
# any message fails, the whole thing is rolled back or retried
|
30
|
+
# if there is deadlock
|
31
|
+
Deimos::Utils::DeadlockRetry.wrap(tags) do
|
32
|
+
if @compacted || self.class.config[:no_keys]
|
33
|
+
update_database(compact_messages(messages))
|
34
|
+
else
|
35
|
+
uncompacted_update(messages)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# Get unique key for the ActiveRecord instance from the incoming key.
|
42
|
+
# Override this method (with super) to customize the set of attributes that
|
43
|
+
# uniquely identifies each record in the database.
|
44
|
+
# @param key [String] The encoded key.
|
45
|
+
# @return [Hash] The key attributes.
|
46
|
+
def record_key(key)
|
47
|
+
decoded_key = decode_key(key)
|
48
|
+
|
49
|
+
if decoded_key.nil?
|
50
|
+
{}
|
51
|
+
elsif decoded_key.is_a?(Hash)
|
52
|
+
@key_converter.convert(decoded_key)
|
53
|
+
else
|
54
|
+
{ @klass.primary_key => decoded_key }
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
protected
|
59
|
+
|
60
|
+
# Perform database operations for a batch of messages without compaction.
|
61
|
+
# All messages are split into slices containing only unique keys, and
|
62
|
+
# each slice is handles as its own batch.
|
63
|
+
# @param messages [Array<Message>] List of messages.
|
64
|
+
def uncompacted_update(messages)
|
65
|
+
BatchSlicer.
|
66
|
+
slice(messages).
|
67
|
+
each(&method(:update_database))
|
68
|
+
end
|
69
|
+
|
70
|
+
# Perform database operations for a group of messages.
|
71
|
+
# All messages with payloads are passed to upsert_records.
|
72
|
+
# All tombstones messages are passed to remove_records.
|
73
|
+
# @param messages [Array<Message>] List of messages.
|
74
|
+
def update_database(messages)
|
75
|
+
# Find all upserted records (i.e. that have a payload) and all
|
76
|
+
# deleted record (no payload)
|
77
|
+
removed, upserted = messages.partition(&:tombstone?)
|
78
|
+
|
79
|
+
upsert_records(upserted) if upserted.any?
|
80
|
+
remove_records(removed) if removed.any?
|
81
|
+
end
|
82
|
+
|
83
|
+
# Upsert any non-deleted records
|
84
|
+
# @param messages [Array<Message>] List of messages for a group of
|
85
|
+
# records to either be updated or inserted.
|
86
|
+
def upsert_records(messages)
|
87
|
+
key_cols = key_columns(messages)
|
88
|
+
|
89
|
+
# Create payloads with payload + key attributes
|
90
|
+
upserts = messages.map do |m|
|
91
|
+
record_attributes(m.payload, m.key)&.
|
92
|
+
merge(record_key(m.key))
|
93
|
+
end
|
94
|
+
|
95
|
+
# If overridden record_attributes indicated no record, skip
|
96
|
+
upserts.compact!
|
97
|
+
|
98
|
+
options = if key_cols.empty?
|
99
|
+
{} # Can't upsert with no key, just do regular insert
|
100
|
+
else
|
101
|
+
{
|
102
|
+
on_duplicate_key_update: {
|
103
|
+
# conflict_target must explicitly list the columns for
|
104
|
+
# Postgres and SQLite. Not required for MySQL, but this
|
105
|
+
# ensures consistent behaviour.
|
106
|
+
conflict_target: key_cols,
|
107
|
+
columns: :all
|
108
|
+
}
|
109
|
+
}
|
110
|
+
end
|
111
|
+
|
112
|
+
@klass.import!(upserts, options)
|
113
|
+
end
|
114
|
+
|
115
|
+
# Delete any records with a tombstone.
|
116
|
+
# @param messages [Array<Message>] List of messages for a group of
|
117
|
+
# deleted records.
|
118
|
+
def remove_records(messages)
|
119
|
+
clause = deleted_query(messages)
|
120
|
+
|
121
|
+
clause.delete_all
|
122
|
+
end
|
123
|
+
|
124
|
+
# Create an ActiveRecord relation that matches all of the passed
|
125
|
+
# records. Used for bulk deletion.
|
126
|
+
# @param records [Array<Message>] List of messages.
|
127
|
+
# @return ActiveRecord::Relation Matching relation.
|
128
|
+
def deleted_query(records)
|
129
|
+
keys = records.
|
130
|
+
map { |m| record_key(m.key)[@klass.primary_key] }.
|
131
|
+
reject(&:nil?)
|
132
|
+
|
133
|
+
@klass.unscoped.where(@klass.primary_key => keys)
|
134
|
+
end
|
135
|
+
|
136
|
+
# Get the set of attribute names that uniquely identify messages in the
|
137
|
+
# batch. Requires at least one record.
|
138
|
+
# @param records [Array<Message>] Non-empty list of messages.
|
139
|
+
# @return [Array<String>] List of attribute names.
|
140
|
+
# @raise If records is empty.
|
141
|
+
def key_columns(records)
|
142
|
+
raise 'Cannot determine key from empty batch' if records.empty?
|
143
|
+
|
144
|
+
first_key = records.first.key
|
145
|
+
record_key(first_key).keys
|
146
|
+
end
|
147
|
+
|
148
|
+
# Compact a batch of messages, taking only the last message for each
|
149
|
+
# unique key.
|
150
|
+
# @param batch [Array<Message>] Batch of messages.
|
151
|
+
# @return [Array<Message>] Compacted batch.
|
152
|
+
def compact_messages(batch)
|
153
|
+
return batch unless batch.first&.key.present?
|
154
|
+
|
155
|
+
batch.reverse.uniq(&:key).reverse!
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|