deimos-temp-fork 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.circleci/config.yml +83 -0
- data/.gitignore +41 -0
- data/.gitmodules +0 -0
- data/.rspec +1 -0
- data/.rubocop.yml +333 -0
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/CHANGELOG.md +349 -0
- data/CODE_OF_CONDUCT.md +77 -0
- data/Dockerfile +23 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +286 -0
- data/Guardfile +22 -0
- data/LICENSE.md +195 -0
- data/README.md +1099 -0
- data/Rakefile +13 -0
- data/bin/deimos +4 -0
- data/deimos-ruby.gemspec +44 -0
- data/docker-compose.yml +71 -0
- data/docs/ARCHITECTURE.md +140 -0
- data/docs/CONFIGURATION.md +236 -0
- data/docs/DATABASE_BACKEND.md +147 -0
- data/docs/INTEGRATION_TESTS.md +52 -0
- data/docs/PULL_REQUEST_TEMPLATE.md +35 -0
- data/docs/UPGRADING.md +128 -0
- data/lib/deimos-temp-fork.rb +95 -0
- data/lib/deimos/active_record_consume/batch_consumption.rb +164 -0
- data/lib/deimos/active_record_consume/batch_slicer.rb +27 -0
- data/lib/deimos/active_record_consume/message_consumption.rb +79 -0
- data/lib/deimos/active_record_consume/schema_model_converter.rb +52 -0
- data/lib/deimos/active_record_consumer.rb +67 -0
- data/lib/deimos/active_record_producer.rb +87 -0
- data/lib/deimos/backends/base.rb +32 -0
- data/lib/deimos/backends/db.rb +41 -0
- data/lib/deimos/backends/kafka.rb +33 -0
- data/lib/deimos/backends/kafka_async.rb +33 -0
- data/lib/deimos/backends/test.rb +20 -0
- data/lib/deimos/batch_consumer.rb +7 -0
- data/lib/deimos/config/configuration.rb +381 -0
- data/lib/deimos/config/phobos_config.rb +137 -0
- data/lib/deimos/consume/batch_consumption.rb +150 -0
- data/lib/deimos/consume/message_consumption.rb +94 -0
- data/lib/deimos/consumer.rb +104 -0
- data/lib/deimos/instrumentation.rb +76 -0
- data/lib/deimos/kafka_message.rb +60 -0
- data/lib/deimos/kafka_source.rb +128 -0
- data/lib/deimos/kafka_topic_info.rb +102 -0
- data/lib/deimos/message.rb +79 -0
- data/lib/deimos/metrics/datadog.rb +47 -0
- data/lib/deimos/metrics/mock.rb +39 -0
- data/lib/deimos/metrics/provider.rb +36 -0
- data/lib/deimos/monkey_patches/phobos_cli.rb +35 -0
- data/lib/deimos/monkey_patches/phobos_producer.rb +51 -0
- data/lib/deimos/poll_info.rb +9 -0
- data/lib/deimos/producer.rb +224 -0
- data/lib/deimos/railtie.rb +8 -0
- data/lib/deimos/schema_backends/avro_base.rb +140 -0
- data/lib/deimos/schema_backends/avro_local.rb +30 -0
- data/lib/deimos/schema_backends/avro_schema_coercer.rb +119 -0
- data/lib/deimos/schema_backends/avro_schema_registry.rb +34 -0
- data/lib/deimos/schema_backends/avro_validation.rb +21 -0
- data/lib/deimos/schema_backends/base.rb +150 -0
- data/lib/deimos/schema_backends/mock.rb +42 -0
- data/lib/deimos/shared_config.rb +63 -0
- data/lib/deimos/test_helpers.rb +360 -0
- data/lib/deimos/tracing/datadog.rb +35 -0
- data/lib/deimos/tracing/mock.rb +40 -0
- data/lib/deimos/tracing/provider.rb +29 -0
- data/lib/deimos/utils/db_poller.rb +150 -0
- data/lib/deimos/utils/db_producer.rb +243 -0
- data/lib/deimos/utils/deadlock_retry.rb +68 -0
- data/lib/deimos/utils/inline_consumer.rb +150 -0
- data/lib/deimos/utils/lag_reporter.rb +175 -0
- data/lib/deimos/utils/schema_controller_mixin.rb +115 -0
- data/lib/deimos/version.rb +5 -0
- data/lib/generators/deimos/active_record/templates/migration.rb.tt +28 -0
- data/lib/generators/deimos/active_record/templates/model.rb.tt +5 -0
- data/lib/generators/deimos/active_record_generator.rb +79 -0
- data/lib/generators/deimos/db_backend/templates/migration +25 -0
- data/lib/generators/deimos/db_backend/templates/rails3_migration +31 -0
- data/lib/generators/deimos/db_backend_generator.rb +48 -0
- data/lib/generators/deimos/db_poller/templates/migration +11 -0
- data/lib/generators/deimos/db_poller/templates/rails3_migration +16 -0
- data/lib/generators/deimos/db_poller_generator.rb +48 -0
- data/lib/tasks/deimos.rake +34 -0
- data/spec/active_record_batch_consumer_spec.rb +481 -0
- data/spec/active_record_consume/batch_slicer_spec.rb +42 -0
- data/spec/active_record_consume/schema_model_converter_spec.rb +105 -0
- data/spec/active_record_consumer_spec.rb +154 -0
- data/spec/active_record_producer_spec.rb +85 -0
- data/spec/backends/base_spec.rb +10 -0
- data/spec/backends/db_spec.rb +54 -0
- data/spec/backends/kafka_async_spec.rb +11 -0
- data/spec/backends/kafka_spec.rb +11 -0
- data/spec/batch_consumer_spec.rb +256 -0
- data/spec/config/configuration_spec.rb +248 -0
- data/spec/consumer_spec.rb +209 -0
- data/spec/deimos_spec.rb +169 -0
- data/spec/generators/active_record_generator_spec.rb +56 -0
- data/spec/handlers/my_batch_consumer.rb +10 -0
- data/spec/handlers/my_consumer.rb +10 -0
- data/spec/kafka_listener_spec.rb +55 -0
- data/spec/kafka_source_spec.rb +381 -0
- data/spec/kafka_topic_info_spec.rb +111 -0
- data/spec/message_spec.rb +19 -0
- data/spec/phobos.bad_db.yml +73 -0
- data/spec/phobos.yml +77 -0
- data/spec/producer_spec.rb +498 -0
- data/spec/rake_spec.rb +19 -0
- data/spec/schema_backends/avro_base_shared.rb +199 -0
- data/spec/schema_backends/avro_local_spec.rb +32 -0
- data/spec/schema_backends/avro_schema_registry_spec.rb +32 -0
- data/spec/schema_backends/avro_validation_spec.rb +24 -0
- data/spec/schema_backends/base_spec.rb +33 -0
- data/spec/schemas/com/my-namespace/Generated.avsc +71 -0
- data/spec/schemas/com/my-namespace/MyNestedSchema.avsc +62 -0
- data/spec/schemas/com/my-namespace/MySchema-key.avsc +13 -0
- data/spec/schemas/com/my-namespace/MySchema.avsc +18 -0
- data/spec/schemas/com/my-namespace/MySchemaCompound-key.avsc +18 -0
- data/spec/schemas/com/my-namespace/MySchemaWithBooleans.avsc +18 -0
- data/spec/schemas/com/my-namespace/MySchemaWithDateTimes.avsc +33 -0
- data/spec/schemas/com/my-namespace/MySchemaWithId.avsc +28 -0
- data/spec/schemas/com/my-namespace/MySchemaWithUniqueId.avsc +32 -0
- data/spec/schemas/com/my-namespace/Wibble.avsc +43 -0
- data/spec/schemas/com/my-namespace/Widget.avsc +27 -0
- data/spec/schemas/com/my-namespace/WidgetTheSecond.avsc +27 -0
- data/spec/schemas/com/my-namespace/request/CreateTopic.avsc +11 -0
- data/spec/schemas/com/my-namespace/request/Index.avsc +11 -0
- data/spec/schemas/com/my-namespace/request/UpdateRequest.avsc +11 -0
- data/spec/schemas/com/my-namespace/response/CreateTopic.avsc +11 -0
- data/spec/schemas/com/my-namespace/response/Index.avsc +11 -0
- data/spec/schemas/com/my-namespace/response/UpdateResponse.avsc +11 -0
- data/spec/spec_helper.rb +267 -0
- data/spec/utils/db_poller_spec.rb +320 -0
- data/spec/utils/db_producer_spec.rb +514 -0
- data/spec/utils/deadlock_retry_spec.rb +74 -0
- data/spec/utils/inline_consumer_spec.rb +31 -0
- data/spec/utils/lag_reporter_spec.rb +76 -0
- data/spec/utils/platform_schema_validation_spec.rb +0 -0
- data/spec/utils/schema_controller_mixin_spec.rb +84 -0
- data/support/deimos-solo.png +0 -0
- data/support/deimos-with-name-next.png +0 -0
- data/support/deimos-with-name.png +0 -0
- data/support/flipp-logo.png +0 -0
- metadata +551 -0
data/README.md
ADDED
|
@@ -0,0 +1,1099 @@
|
|
|
1
|
+
<p align="center">
|
|
2
|
+
<img src="support/deimos-with-name.png" title="Deimos logo"/>
|
|
3
|
+
<br/>
|
|
4
|
+
<img src="https://img.shields.io/circleci/build/github/flipp-oss/deimos.svg" alt="CircleCI"/>
|
|
5
|
+
<a href="https://badge.fury.io/rb/deimos-ruby"><img src="https://badge.fury.io/rb/deimos-ruby.svg" alt="Gem Version" height="18"></a>
|
|
6
|
+
<img src="https://img.shields.io/codeclimate/maintainability/flipp-oss/deimos.svg"/>
|
|
7
|
+
</p>
|
|
8
|
+
|
|
9
|
+
A Ruby framework for marrying Kafka, a schema definition like Avro, and/or ActiveRecord and provide
|
|
10
|
+
a useful toolbox of goodies for Ruby-based Kafka development.
|
|
11
|
+
Built on Phobos and hence Ruby-Kafka.
|
|
12
|
+
|
|
13
|
+
<!--ts-->
|
|
14
|
+
* [Additional Documentation](#additional-documentation)
|
|
15
|
+
* [Installation](#installation)
|
|
16
|
+
* [Versioning](#versioning)
|
|
17
|
+
* [Configuration](#configuration)
|
|
18
|
+
* [Schemas](#schemas)
|
|
19
|
+
* [Producers](#producers)
|
|
20
|
+
* [Auto-added Fields](#auto-added-fields)
|
|
21
|
+
* [Coerced Values](#coerced-values)
|
|
22
|
+
* [Instrumentation](#instrumentation)
|
|
23
|
+
* [Kafka Message Keys](#kafka-message-keys)
|
|
24
|
+
* [Consumers](#consumers)
|
|
25
|
+
* [Rails Integration](#rails-integration)
|
|
26
|
+
* [Controller Mixin](#controller-mixin)
|
|
27
|
+
* [Database Backend](#database-backend)
|
|
28
|
+
* [Database Poller](#database-poller)
|
|
29
|
+
* [Running Consumers](#running-consumers)
|
|
30
|
+
* [Metrics](#metrics)
|
|
31
|
+
* [Testing](#testing)
|
|
32
|
+
* [Test Helpers](#test-helpers)
|
|
33
|
+
* [Integration Test Helpers](#integration-test-helpers)
|
|
34
|
+
* [Utilities](#utilities)
|
|
35
|
+
* [Contributing](#contributing)
|
|
36
|
+
<!--te-->
|
|
37
|
+
|
|
38
|
+
# Additional Documentation
|
|
39
|
+
|
|
40
|
+
Please see the following for further information not covered by this readme:
|
|
41
|
+
|
|
42
|
+
* [Architecture Design](docs/ARCHITECTURE.md)
|
|
43
|
+
* [Configuration Reference](docs/CONFIGURATION.md)
|
|
44
|
+
* [Database Backend Feature](docs/DATABASE_BACKEND.md)
|
|
45
|
+
* [Upgrading Deimos](docs/UPGRADING.md)
|
|
46
|
+
* [Contributing to Integration Tests](docs/INTEGRATION_TESTS.md)
|
|
47
|
+
|
|
48
|
+
# Installation
|
|
49
|
+
|
|
50
|
+
Add this line to your application's Gemfile:
|
|
51
|
+
```ruby
|
|
52
|
+
gem 'deimos-ruby'
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
And then execute:
|
|
56
|
+
|
|
57
|
+
$ bundle
|
|
58
|
+
|
|
59
|
+
Or install it yourself as:
|
|
60
|
+
|
|
61
|
+
$ gem install deimos-ruby
|
|
62
|
+
|
|
63
|
+
# Versioning
|
|
64
|
+
|
|
65
|
+
We use a version of semver for this gem. Any change in previous behavior
|
|
66
|
+
(something works differently or something old no longer works)
|
|
67
|
+
is denoted with a bump in the minor version (0.4 -> 0.5). Patch versions
|
|
68
|
+
are for bugfixes or new functionality which does not affect existing code. You
|
|
69
|
+
should be locking your Gemfile to the minor version:
|
|
70
|
+
|
|
71
|
+
```ruby
|
|
72
|
+
gem 'deimos-ruby', '~> 1.1'
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
# Configuration
|
|
76
|
+
|
|
77
|
+
For a full configuration reference, please see [the configuration docs ](docs/CONFIGURATION.md).
|
|
78
|
+
|
|
79
|
+
# Schemas
|
|
80
|
+
|
|
81
|
+
Deimos was originally written only supporting Avro encoding via a schema registry.
|
|
82
|
+
This has since been expanded to a plugin architecture allowing messages to be
|
|
83
|
+
encoded and decoded via any schema specification you wish.
|
|
84
|
+
|
|
85
|
+
Currently we have the following possible schema backends:
|
|
86
|
+
* Avro Local (use pure Avro)
|
|
87
|
+
* Avro Schema Registry (use the Confluent Schema Registry)
|
|
88
|
+
* Avro Validation (validate using an Avro schema but leave decoded - this is useful
|
|
89
|
+
for unit testing and development)
|
|
90
|
+
* Mock (no actual encoding/decoding).
|
|
91
|
+
|
|
92
|
+
Note that to use Avro-encoding, you must include the [avro_turf](https://github.com/dasch/avro_turf) gem in your
|
|
93
|
+
Gemfile.
|
|
94
|
+
|
|
95
|
+
Other possible schemas could include [Protobuf](https://developers.google.com/protocol-buffers), [JSONSchema](https://json-schema.org/), etc. Feel free to
|
|
96
|
+
contribute!
|
|
97
|
+
|
|
98
|
+
To create a new schema backend, please see the existing examples [here](lib/deimos/schema_backends).
|
|
99
|
+
|
|
100
|
+
# Producers
|
|
101
|
+
|
|
102
|
+
Producers will look like this:
|
|
103
|
+
|
|
104
|
+
```ruby
|
|
105
|
+
class MyProducer < Deimos::Producer
|
|
106
|
+
|
|
107
|
+
class << self
|
|
108
|
+
|
|
109
|
+
# Optionally override the default partition key logic, which is to use
|
|
110
|
+
# the payload key if it's provided, and nil if there is no payload key.
|
|
111
|
+
def partition_key(payload)
|
|
112
|
+
payload[:my_id]
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# You can call publish / publish_list directly, or create new methods
|
|
116
|
+
# wrapping them.
|
|
117
|
+
|
|
118
|
+
def send_some_message(an_object)
|
|
119
|
+
payload = {
|
|
120
|
+
'some-key' => an_object.foo,
|
|
121
|
+
'some-key2' => an_object.bar
|
|
122
|
+
}
|
|
123
|
+
# You can also publish an array with self.publish_list(payloads)
|
|
124
|
+
# You may specify the topic here with self.publish(payload, topic: 'my-topic')
|
|
125
|
+
self.publish(payload)
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
end
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
### Auto-added Fields
|
|
135
|
+
|
|
136
|
+
If your schema has a field called `message_id`, and the payload you give
|
|
137
|
+
your producer doesn't have this set, Deimos will auto-generate
|
|
138
|
+
a message ID. It is highly recommended to give all schemas a message_id
|
|
139
|
+
so that you can track each sent message via logging.
|
|
140
|
+
|
|
141
|
+
You can also provide a field in your schema called `timestamp` which will be
|
|
142
|
+
auto-filled with the current timestamp if not provided.
|
|
143
|
+
|
|
144
|
+
### Coerced Values
|
|
145
|
+
|
|
146
|
+
Deimos will do some simple coercions if you pass values that don't
|
|
147
|
+
exactly match the schema.
|
|
148
|
+
|
|
149
|
+
* If the schema is :int or :long, any integer value, or a string representing
|
|
150
|
+
an integer, will be parsed to Integer.
|
|
151
|
+
* If the schema is :float or :double, any numeric value, or a string
|
|
152
|
+
representing a number, will be parsed to Float.
|
|
153
|
+
* If the schema is :string, if the value implements its own `to_s` method,
|
|
154
|
+
this will be called on it. This includes hashes, symbols, numbers, dates, etc.
|
|
155
|
+
|
|
156
|
+
### Instrumentation
|
|
157
|
+
|
|
158
|
+
Deimos will send ActiveSupport Notifications.
|
|
159
|
+
You can listen to these notifications e.g. as follows:
|
|
160
|
+
|
|
161
|
+
```ruby
|
|
162
|
+
Deimos.subscribe('produce') do |event|
|
|
163
|
+
# event is an ActiveSupport::Notifications::Event
|
|
164
|
+
# you can access time, duration, and transaction_id
|
|
165
|
+
# payload contains :producer, :topic, and :payloads
|
|
166
|
+
data = event.payload
|
|
167
|
+
end
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
The following events are produced (in addition to the ones already
|
|
171
|
+
produced by Phobos and RubyKafka):
|
|
172
|
+
|
|
173
|
+
* `produce_error` - sent when an error occurs when producing a message.
|
|
174
|
+
* producer - the class that produced the message
|
|
175
|
+
* topic
|
|
176
|
+
* exception_object
|
|
177
|
+
* payloads - the unencoded payloads
|
|
178
|
+
* `encode_messages` - sent when messages are being schema-encoded.
|
|
179
|
+
* producer - the class that produced the message
|
|
180
|
+
* topic
|
|
181
|
+
* payloads - the unencoded payloads
|
|
182
|
+
* `db_producer.produce` - sent when the DB producer sends messages for the
|
|
183
|
+
DB backend. Messages that are too large will be caught with this
|
|
184
|
+
notification - they will be deleted from the table and this notification
|
|
185
|
+
will be fired with an exception object.
|
|
186
|
+
* topic
|
|
187
|
+
* exception_object
|
|
188
|
+
* messages - the batch of messages (in the form of `Deimos::KafkaMessage`s)
|
|
189
|
+
that failed - this should have only a single message in the batch.
|
|
190
|
+
|
|
191
|
+
Similarly:
|
|
192
|
+
```ruby
|
|
193
|
+
Deimos.subscribe('produce_error') do |event|
|
|
194
|
+
data = event.payloads
|
|
195
|
+
Mail.send("Got an error #{event.exception_object.message} on topic #{data[:topic]} with payloads #{data[:payloads]}")
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
Deimos.subscribe('encode_messages') do |event|
|
|
199
|
+
# ...
|
|
200
|
+
end
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
### Kafka Message Keys
|
|
204
|
+
|
|
205
|
+
Topics representing events rather than domain data don't need keys. However,
|
|
206
|
+
best practice for domain messages is to schema-encode message keys
|
|
207
|
+
with a separate schema.
|
|
208
|
+
|
|
209
|
+
This enforced by requiring producers to define a `key_config` directive. If
|
|
210
|
+
any message comes in with a key, the producer will error out if `key_config` is
|
|
211
|
+
not defined.
|
|
212
|
+
|
|
213
|
+
There are three possible configurations to use:
|
|
214
|
+
|
|
215
|
+
* `key_config none: true` - this indicates that you are not using keys at all
|
|
216
|
+
for this topic. This *must* be set if your messages won't have keys - either
|
|
217
|
+
all your messages in a topic need to have a key, or they all need to have
|
|
218
|
+
no key. This is a good choice for events that aren't keyed - you can still
|
|
219
|
+
set a partition key.
|
|
220
|
+
* `key_config plain: true` - this indicates that you are not using an encoded
|
|
221
|
+
key. Use this for legacy topics - new topics should not use this setting.
|
|
222
|
+
* `key_config schema: 'MyKeySchema-key'` - this tells the producer to look for
|
|
223
|
+
an existing key schema named `MyKeySchema-key` in the schema registry and to
|
|
224
|
+
encode the key using it. Use this if you've already created a key schema
|
|
225
|
+
or the key value does not exist in the existing payload
|
|
226
|
+
(e.g. it is a compound or generated key).
|
|
227
|
+
* `key_config field: 'my_field'` - this tells the producer to look for a field
|
|
228
|
+
named `my_field` in the value schema. When a payload comes in, the producer
|
|
229
|
+
will take that value from the payload and insert it in a *dynamically generated*
|
|
230
|
+
key schema. This key schema does not need to live in your codebase. Instead,
|
|
231
|
+
it will be a subset of the value schema with only the key field in it.
|
|
232
|
+
|
|
233
|
+
If your value schema looks like this:
|
|
234
|
+
```javascript
|
|
235
|
+
{
|
|
236
|
+
"namespace": "com.my-namespace",
|
|
237
|
+
"name": "MySchema",
|
|
238
|
+
"type": "record",
|
|
239
|
+
"doc": "Test schema",
|
|
240
|
+
"fields": [
|
|
241
|
+
{
|
|
242
|
+
"name": "test_id",
|
|
243
|
+
"type": "string",
|
|
244
|
+
"doc": "test string"
|
|
245
|
+
},
|
|
246
|
+
{
|
|
247
|
+
"name": "some_int",
|
|
248
|
+
"type": "int",
|
|
249
|
+
"doc": "test int"
|
|
250
|
+
}
|
|
251
|
+
]
|
|
252
|
+
}
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
...setting `key_config field: 'test_id'` will create a key schema that looks
|
|
256
|
+
like this:
|
|
257
|
+
|
|
258
|
+
```javascript
|
|
259
|
+
{
|
|
260
|
+
"namespace": "com.my-namespace",
|
|
261
|
+
"name": "MySchema-key",
|
|
262
|
+
"type": "record",
|
|
263
|
+
"doc": "Key for com.my-namespace.MySchema",
|
|
264
|
+
"fields": [
|
|
265
|
+
{
|
|
266
|
+
"name": "test_id",
|
|
267
|
+
"type": "string",
|
|
268
|
+
"doc": "test string"
|
|
269
|
+
}
|
|
270
|
+
]
|
|
271
|
+
}
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
If you publish a payload `{ "test_id" => "123", "some_int" => 123 }`, this
|
|
275
|
+
will be turned into a key that looks like `{ "test_id" => "123"}` and schema-encoded
|
|
276
|
+
before being sent to Kafka.
|
|
277
|
+
|
|
278
|
+
If you are using `plain` or `schema` as your config, you will need to have a
|
|
279
|
+
special `payload_key` key to your payload hash. This will be extracted and
|
|
280
|
+
used as the key (for `plain`, it will be used directly, while for `schema`
|
|
281
|
+
it will be encoded first against the schema). So your payload would look like
|
|
282
|
+
`{ "test_id" => "123", "some_int" => 123, payload_key: "some_other_key"}`.
|
|
283
|
+
Remember that if you're using `schema`, the `payload_key` must be a *hash*,
|
|
284
|
+
not a plain value.
|
|
285
|
+
|
|
286
|
+
# Consumers
|
|
287
|
+
|
|
288
|
+
Here is a sample consumer:
|
|
289
|
+
|
|
290
|
+
```ruby
|
|
291
|
+
class MyConsumer < Deimos::Consumer
|
|
292
|
+
|
|
293
|
+
# Optionally overload this to consider a particular exception
|
|
294
|
+
# "fatal" only for this consumer. This is considered in addition
|
|
295
|
+
# to the global `fatal_error` configuration block.
|
|
296
|
+
def fatal_error?(exception, payload, metadata)
|
|
297
|
+
exception.is_a?(MyBadError)
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
def consume(payload, metadata)
|
|
301
|
+
# Same method as Phobos consumers.
|
|
302
|
+
# payload is an schema-decoded hash.
|
|
303
|
+
# metadata is a hash that contains information like :key and :topic.
|
|
304
|
+
# In general, your key should be included in the payload itself. However,
|
|
305
|
+
# if you need to access it separately from the payload, you can use
|
|
306
|
+
# metadata[:key]
|
|
307
|
+
end
|
|
308
|
+
end
|
|
309
|
+
```
|
|
310
|
+
|
|
311
|
+
### Fatal Errors
|
|
312
|
+
|
|
313
|
+
The recommended configuration is for consumers *not* to raise errors
|
|
314
|
+
they encounter while consuming messages. Errors can be come from
|
|
315
|
+
a variety of sources and it's possible that the message itself (or
|
|
316
|
+
what downstream systems are doing with it) is causing it. If you do
|
|
317
|
+
not continue on past this message, your consumer will essentially be
|
|
318
|
+
stuck forever unless you take manual action to skip the offset.
|
|
319
|
+
|
|
320
|
+
Use `config.consumers.reraise_errors = false` to swallow errors. You
|
|
321
|
+
can use instrumentation to handle errors you receive. You can also
|
|
322
|
+
specify "fatal errors" either via global configuration (`config.fatal_error`)
|
|
323
|
+
or via overriding a method on an individual consumer (`def fatal_error`).
|
|
324
|
+
|
|
325
|
+
### Batch Consumption
|
|
326
|
+
|
|
327
|
+
Instead of consuming messages one at a time, consumers can receive a batch of
|
|
328
|
+
messages as an array and then process them together. This can improve
|
|
329
|
+
consumer throughput, depending on the use case. Batch consumers behave like
|
|
330
|
+
other consumers in regards to key and payload decoding, etc.
|
|
331
|
+
|
|
332
|
+
To enable batch consumption, ensure that the `delivery` property of your
|
|
333
|
+
consumer is set to `inline_batch`.
|
|
334
|
+
|
|
335
|
+
Batch consumers will invoke the `consume_batch` method instead of `consume`
|
|
336
|
+
as in this example:
|
|
337
|
+
|
|
338
|
+
```ruby
|
|
339
|
+
class MyBatchConsumer < Deimos::Consumer
|
|
340
|
+
|
|
341
|
+
def consume_batch(payloads, metadata)
|
|
342
|
+
# payloads is an array of schema-decoded hashes.
|
|
343
|
+
# metadata is a hash that contains information like :keys, :topic,
|
|
344
|
+
# and :first_offset.
|
|
345
|
+
# Keys are automatically decoded and available as an array with
|
|
346
|
+
# the same cardinality as the payloads. If you need to iterate
|
|
347
|
+
# over payloads and keys together, you can use something like this:
|
|
348
|
+
|
|
349
|
+
payloads.zip(metadata[:keys]) do |_payload, _key|
|
|
350
|
+
# Do something
|
|
351
|
+
end
|
|
352
|
+
end
|
|
353
|
+
end
|
|
354
|
+
```
|
|
355
|
+
|
|
356
|
+
# Rails Integration
|
|
357
|
+
|
|
358
|
+
### Producing
|
|
359
|
+
|
|
360
|
+
Deimos comes with an ActiveRecordProducer. This takes a single or
|
|
361
|
+
list of ActiveRecord objects or hashes and maps it to the given schema.
|
|
362
|
+
|
|
363
|
+
An example would look like this:
|
|
364
|
+
|
|
365
|
+
```ruby
|
|
366
|
+
class MyProducer < Deimos::ActiveRecordProducer
|
|
367
|
+
|
|
368
|
+
# The record class should be set on every ActiveRecordProducer.
|
|
369
|
+
# By default, if you give the producer a hash, it will re-fetch the
|
|
370
|
+
# record itself for use in the payload generation. This can be useful
|
|
371
|
+
# if you pass a list of hashes to the method e.g. as part of a
|
|
372
|
+
# mass import operation. You can turn off this behavior (e.g. if you're just
|
|
373
|
+
# using the default functionality and don't need to override it)
|
|
374
|
+
# by setting `refetch` to false. This will avoid extra database fetches.
|
|
375
|
+
record_class Widget, refetch: false
|
|
376
|
+
|
|
377
|
+
# Optionally override this if you want the message to be
|
|
378
|
+
# sent even if fields that aren't in the schema are changed.
|
|
379
|
+
def watched_attributes
|
|
380
|
+
super + ['a_non_schema_attribute']
|
|
381
|
+
end
|
|
382
|
+
|
|
383
|
+
# If you want to just use the default functionality you can leave this
|
|
384
|
+
# method out entirely. You only need to use it if you want to massage
|
|
385
|
+
# the payload in some way, e.g. adding fields that don't exist on the
|
|
386
|
+
# record itself.
|
|
387
|
+
def generate_payload(attributes, record)
|
|
388
|
+
super # generates payload based on the record and schema
|
|
389
|
+
end
|
|
390
|
+
|
|
391
|
+
end
|
|
392
|
+
|
|
393
|
+
# or `send_event` with just one Widget
|
|
394
|
+
MyProducer.send_events([Widget.new(foo: 1), Widget.new(foo: 2)])
|
|
395
|
+
MyProducer.send_events([{foo: 1}, {foo: 2}])
|
|
396
|
+
```
|
|
397
|
+
|
|
398
|
+
#### Disabling Producers
|
|
399
|
+
|
|
400
|
+
You can disable producers globally or inside a block. Globally:
|
|
401
|
+
```ruby
|
|
402
|
+
Deimos.config.producers.disabled = true
|
|
403
|
+
```
|
|
404
|
+
|
|
405
|
+
For the duration of a block:
|
|
406
|
+
```ruby
|
|
407
|
+
Deimos.disable_producers do
|
|
408
|
+
# code goes here
|
|
409
|
+
end
|
|
410
|
+
```
|
|
411
|
+
|
|
412
|
+
For specific producers only:
|
|
413
|
+
```ruby
|
|
414
|
+
Deimos.disable_producers(Producer1, Producer2) do
|
|
415
|
+
# code goes here
|
|
416
|
+
end
|
|
417
|
+
```
|
|
418
|
+
|
|
419
|
+
#### KafkaSource
|
|
420
|
+
|
|
421
|
+
There is a special mixin which can be added to any ActiveRecord class. This
|
|
422
|
+
will create callbacks which will automatically send messages to Kafka whenever
|
|
423
|
+
this class is saved. This even includes using the [activerecord-import](https://github.com/zdennis/activerecord-import) gem
|
|
424
|
+
to import objects (including using `on_duplicate_key_update`). However,
|
|
425
|
+
it will *not* work for `update_all`, `delete` or `delete_all`, and naturally
|
|
426
|
+
will not fire if using pure SQL or Arel.
|
|
427
|
+
|
|
428
|
+
Note that these messages are sent *during the transaction*, i.e. using
|
|
429
|
+
`after_create`, `after_update` and `after_destroy`. If there are
|
|
430
|
+
questions of consistency between the database and Kafka, it is recommended
|
|
431
|
+
to switch to using the DB backend (see next section) to avoid these issues.
|
|
432
|
+
|
|
433
|
+
When the object is destroyed, an empty payload with a payload key consisting of
|
|
434
|
+
the record's primary key is sent to the producer. If your topic's key is
|
|
435
|
+
from another field, you will need to override the `deletion_payload` method.
|
|
436
|
+
|
|
437
|
+
```ruby
|
|
438
|
+
class Widget < ActiveRecord::Base
|
|
439
|
+
include Deimos::KafkaSource
|
|
440
|
+
|
|
441
|
+
# Class method that defines an ActiveRecordProducer(s) to take the object
|
|
442
|
+
# and turn it into a payload.
|
|
443
|
+
def self.kafka_producers
|
|
444
|
+
[MyProducer]
|
|
445
|
+
end
|
|
446
|
+
|
|
447
|
+
def deletion_payload
|
|
448
|
+
{ payload_key: self.uuid }
|
|
449
|
+
end
|
|
450
|
+
|
|
451
|
+
# Optional - indicate that you want to send messages when these events
|
|
452
|
+
# occur.
|
|
453
|
+
def self.kafka_config
|
|
454
|
+
{
|
|
455
|
+
:update => true,
|
|
456
|
+
:delete => true,
|
|
457
|
+
:import => true,
|
|
458
|
+
:create => true
|
|
459
|
+
}
|
|
460
|
+
end
|
|
461
|
+
|
|
462
|
+
end
|
|
463
|
+
```
|
|
464
|
+
|
|
465
|
+
### Controller Mixin
|
|
466
|
+
|
|
467
|
+
Deimos comes with a mixin for `ActionController` which automatically encodes and decodes schema
|
|
468
|
+
payloads. There are some advantages to encoding your data in e.g. Avro rather than straight JSON,
|
|
469
|
+
particularly if your service is talking to another backend service rather than the front-end
|
|
470
|
+
browser:
|
|
471
|
+
|
|
472
|
+
* It enforces a contract between services. Solutions like [OpenAPI](https://swagger.io/specification/)
|
|
473
|
+
do this as well, but in order for the client to know the contract, usually some kind of code
|
|
474
|
+
generation has to happen. Using schemas ensures both sides know the contract without having to change code.
|
|
475
|
+
In addition, OpenAPI is now a huge and confusing format, and using simpler schema formats
|
|
476
|
+
can be beneficial.
|
|
477
|
+
* Using Avro or Protobuf ensures both forwards and backwards compatibility,
|
|
478
|
+
which reduces the need for versioning since both sides can simply ignore fields they aren't aware
|
|
479
|
+
of.
|
|
480
|
+
* Encoding and decoding using Avro or Protobuf is generally faster than straight JSON, and
|
|
481
|
+
results in smaller payloads and therefore less network traffic.
|
|
482
|
+
|
|
483
|
+
To use the mixin, add the following to your `WhateverController`:
|
|
484
|
+
|
|
485
|
+
```ruby
|
|
486
|
+
class WhateverController < ApplicationController
|
|
487
|
+
include Deimos::Utils::SchemaControllerMixin
|
|
488
|
+
|
|
489
|
+
request_namespace 'my.namespace.requests'
|
|
490
|
+
response_namespace 'my.namespace.responses'
|
|
491
|
+
|
|
492
|
+
# Add a "schemas" line for all routes that should encode/decode schemas.
|
|
493
|
+
# Default is to match the schema name to the route name.
|
|
494
|
+
schemas :index
|
|
495
|
+
# will look for: my.namespace.requests.Index.avsc
|
|
496
|
+
# my.namespace.responses.Index.avsc
|
|
497
|
+
|
|
498
|
+
# Can use mapping to change the schema but keep the namespaces,
|
|
499
|
+
# i.e. use the same schema name across the two namespaces
|
|
500
|
+
schemas create: 'CreateTopic'
|
|
501
|
+
# will look for: my.namespace.requests.CreateTopic.avsc
|
|
502
|
+
# my.namespace.responses.CreateTopic.avsc
|
|
503
|
+
|
|
504
|
+
# If all routes use the default, you can add them all at once
|
|
505
|
+
schemas :index, :show, :update
|
|
506
|
+
|
|
507
|
+
# Different schemas can be specified as well
|
|
508
|
+
schemas :index, :show, request: 'IndexRequest', response: 'IndexResponse'
|
|
509
|
+
|
|
510
|
+
# To access the encoded data, use the `payload` helper method, and to render it back,
|
|
511
|
+
# use the `render_schema` method.
|
|
512
|
+
|
|
513
|
+
def index
|
|
514
|
+
response = { 'response_id' => payload['request_id'] + 'hi mom' }
|
|
515
|
+
render_schema(response)
|
|
516
|
+
end
|
|
517
|
+
end
|
|
518
|
+
```
|
|
519
|
+
|
|
520
|
+
To make use of this feature, your requests and responses need to have the correct content type.
|
|
521
|
+
For Avro content, this is the `avro/binary` content type.
|
|
522
|
+
|
|
523
|
+
# Database Backend
|
|
524
|
+
|
|
525
|
+
Deimos provides a way to allow Kafka messages to be created inside a
|
|
526
|
+
database transaction, and send them asynchronously. This ensures that your
|
|
527
|
+
database transactions and Kafka messages related to those transactions
|
|
528
|
+
are always in sync. Essentially, it separates the message logic so that a
|
|
529
|
+
message is first validated, encoded, and saved in the database, and then sent
|
|
530
|
+
on a separate thread. This means if you have to roll back your transaction,
|
|
531
|
+
it also rolls back your Kafka messages.
|
|
532
|
+
|
|
533
|
+
This is also known as the [Transactional Outbox pattern](https://microservices.io/patterns/data/transactional-outbox.html).
|
|
534
|
+
|
|
535
|
+
To enable this, first generate the migration to create the relevant tables:
|
|
536
|
+
|
|
537
|
+
rails g deimos:db_backend
|
|
538
|
+
|
|
539
|
+
You can now set the following configuration:
|
|
540
|
+
|
|
541
|
+
config.producers.backend = :db
|
|
542
|
+
|
|
543
|
+
This will save all your Kafka messages to the `kafka_messages` table instead
|
|
544
|
+
of immediately sending to Kafka. Now, you just need to call
|
|
545
|
+
|
|
546
|
+
Deimos.start_db_backend!
|
|
547
|
+
|
|
548
|
+
You can do this inside a thread or fork block.
|
|
549
|
+
If using Rails, you can use a Rake task to do this:
|
|
550
|
+
|
|
551
|
+
rails deimos:db_producer
|
|
552
|
+
|
|
553
|
+
This creates one or more threads dedicated to scanning and publishing these
|
|
554
|
+
messages by using the `kafka_topics` table in a manner similar to
|
|
555
|
+
[Delayed Job](https://github.com/collectiveidea/delayed_job).
|
|
556
|
+
You can pass in a number of threads to the method:
|
|
557
|
+
|
|
558
|
+
Deimos.start_db_backend!(thread_count: 2) # OR
|
|
559
|
+
THREAD_COUNT=5 rails deimos:db_producer
|
|
560
|
+
|
|
561
|
+
If you want to force a message to send immediately, just call the `publish_list`
|
|
562
|
+
method with `force_send: true`. You can also pass `force_send` into any of the
|
|
563
|
+
other methods that publish events, like `send_event` in `ActiveRecordProducer`.
|
|
564
|
+
|
|
565
|
+
A couple of gotchas when using this feature:
|
|
566
|
+
* This may result in high throughput depending on your scale. If you're
|
|
567
|
+
using Rails < 5.1, you should add a migration to change the `id` column
|
|
568
|
+
to `BIGINT`. Rails >= 5.1 sets it to BIGINT by default.
|
|
569
|
+
* This table is high throughput but should generally be empty. Make sure
|
|
570
|
+
you optimize/vacuum this table regularly to reclaim the disk space.
|
|
571
|
+
* Currently, threads allow you to scale the *number* of topics but not
|
|
572
|
+
a single large topic with lots of messages. There is an [issue](https://github.com/flipp-oss/deimos/issues/23)
|
|
573
|
+
opened that would help with this case.
|
|
574
|
+
|
|
575
|
+
For more information on how the database backend works and why it was
|
|
576
|
+
implemented, please see [Database Backends](docs/DATABASE_BACKEND.md).
|
|
577
|
+
|
|
578
|
+
### Consuming
|
|
579
|
+
|
|
580
|
+
Deimos provides an ActiveRecordConsumer which will take a payload
|
|
581
|
+
and automatically save it to a provided model. It will take the intersection
|
|
582
|
+
of the payload fields and the model attributes, and either create a new record
|
|
583
|
+
or update an existing record. It will use the message key to find the record
|
|
584
|
+
in the database.
|
|
585
|
+
|
|
586
|
+
To delete a record, simply produce a message with the record's ID as the message
|
|
587
|
+
key and a null payload.
|
|
588
|
+
|
|
589
|
+
Note that to retrieve the key, you must specify the correct [key encoding](#kafka-message-keys)
|
|
590
|
+
configuration.
|
|
591
|
+
|
|
592
|
+
A sample consumer would look as follows:
|
|
593
|
+
|
|
594
|
+
```ruby
|
|
595
|
+
class MyConsumer < Deimos::ActiveRecordConsumer
|
|
596
|
+
record_class Widget
|
|
597
|
+
|
|
598
|
+
# Optional override of the way to fetch records based on payload and
|
|
599
|
+
# key. Default is to use the key to search the primary key of the table.
|
|
600
|
+
# Only used in non-batch mode.
|
|
601
|
+
def fetch_record(klass, payload, key)
|
|
602
|
+
super
|
|
603
|
+
end
|
|
604
|
+
|
|
605
|
+
# Optional override on how to set primary key for new records.
|
|
606
|
+
# Default is to set the class's primary key to the message's decoded key.
|
|
607
|
+
# Only used in non-batch mode.
|
|
608
|
+
def assign_key(record, payload, key)
|
|
609
|
+
super
|
|
610
|
+
end
|
|
611
|
+
|
|
612
|
+
# Optional override of the default behavior, which is to call `destroy`
|
|
613
|
+
# on the record - e.g. you can replace this with "archiving" the record
|
|
614
|
+
# in some way.
|
|
615
|
+
# Only used in non-batch mode.
|
|
616
|
+
def destroy_record(record)
|
|
617
|
+
super
|
|
618
|
+
end
|
|
619
|
+
|
|
620
|
+
# Optional override to change the attributes of the record before they
|
|
621
|
+
# are saved.
|
|
622
|
+
def record_attributes(payload, key)
|
|
623
|
+
super.merge(:some_field => 'some_value')
|
|
624
|
+
end
|
|
625
|
+
|
|
626
|
+
# Optional override to change the attributes used for identifying records
|
|
627
|
+
def record_key(payload)
|
|
628
|
+
super
|
|
629
|
+
end
|
|
630
|
+
|
|
631
|
+
# Optional override, returns true by default.
|
|
632
|
+
# When this method returns true, a record corresponding to the message
|
|
633
|
+
# is created/updated.
|
|
634
|
+
# When this method returns false, message processing is skipped and a
|
|
635
|
+
# corresponding record will NOT be created/updated.
|
|
636
|
+
def process_message?(payload)
|
|
637
|
+
super
|
|
638
|
+
end
|
|
639
|
+
end
|
|
640
|
+
```
|
|
641
|
+
|
|
642
|
+
#### Generating Tables and Models
|
|
643
|
+
|
|
644
|
+
Deimos provides a generator that takes an existing schema and generates a
|
|
645
|
+
database table based on its fields. By default, any complex sub-types (such as
|
|
646
|
+
records or arrays) are turned into JSON (if supported) or string columns.
|
|
647
|
+
|
|
648
|
+
Before running this migration, you must first copy the schema into your repo
|
|
649
|
+
in the correct path (in the example above, you would need to have a file
|
|
650
|
+
`{SCHEMA_ROOT}/com/my-namespace/MySchema.avsc`).
|
|
651
|
+
|
|
652
|
+
To generate a model and migration, run the following:
|
|
653
|
+
|
|
654
|
+
rails g deimos:active_record TABLE_NAME FULL_SCHEMA_NAME
|
|
655
|
+
|
|
656
|
+
Example:
|
|
657
|
+
|
|
658
|
+
rails g deimos:active_record my_table com.my-namespace.MySchema
|
|
659
|
+
|
|
660
|
+
...would generate:
|
|
661
|
+
|
|
662
|
+
db/migrate/1234_create_my_table.rb
|
|
663
|
+
app/models/my_table.rb
|
|
664
|
+
|
|
665
|
+
#### Batch Consumers
|
|
666
|
+
|
|
667
|
+
Deimos also provides a batch consumption mode for `ActiveRecordConsumer` which
|
|
668
|
+
processes groups of messages at once using the ActiveRecord backend.
|
|
669
|
+
|
|
670
|
+
Batch ActiveRecord consumers make use of the
|
|
671
|
+
[activerecord-import](https://github.com/zdennis/activerecord-import) to insert
|
|
672
|
+
or update multiple records in bulk SQL statements. This reduces processing
|
|
673
|
+
time at the cost of skipping ActiveRecord callbacks for individual records.
|
|
674
|
+
Deleted records (tombstones) are grouped into `delete_all` calls and thus also
|
|
675
|
+
skip `destroy` callbacks.
|
|
676
|
+
|
|
677
|
+
Batch consumption is used when the `delivery` setting for your consumer is set to `inline_batch`.
|
|
678
|
+
|
|
679
|
+
**Note**: Currently, batch consumption only supports only primary keys as identifiers out of the box. See
|
|
680
|
+
[the specs](spec/active_record_batch_consumer_spec.rb) for an example of how to use compound keys.
|
|
681
|
+
|
|
682
|
+
By default, batches will be compacted before processing, i.e. only the last
|
|
683
|
+
message for each unique key in a batch will actually be processed. To change
|
|
684
|
+
this behaviour, call `compacted false` inside of your consumer definition.
|
|
685
|
+
|
|
686
|
+
A sample batch consumer would look as follows:
|
|
687
|
+
|
|
688
|
+
```ruby
|
|
689
|
+
class MyConsumer < Deimos::ActiveRecordConsumer
|
|
690
|
+
schema 'MySchema'
|
|
691
|
+
key_config field: 'my_field'
|
|
692
|
+
record_class Widget
|
|
693
|
+
|
|
694
|
+
# Controls whether the batch is compacted before consuming.
|
|
695
|
+
# If true, only the last message for each unique key in a batch will be
|
|
696
|
+
# processed.
|
|
697
|
+
# If false, messages will be grouped into "slices" of independent keys
|
|
698
|
+
# and each slice will be imported separately.
|
|
699
|
+
#
|
|
700
|
+
# compacted false
|
|
701
|
+
|
|
702
|
+
|
|
703
|
+
# Optional override of the default behavior, which is to call `delete_all`
|
|
704
|
+
# on the associated records - e.g. you can replace this with setting a deleted
|
|
705
|
+
# flag on the record.
|
|
706
|
+
def remove_records(records)
|
|
707
|
+
super
|
|
708
|
+
end
|
|
709
|
+
|
|
710
|
+
# Optional override to change the attributes of the record before they
|
|
711
|
+
# are saved.
|
|
712
|
+
def record_attributes(payload, key)
|
|
713
|
+
super.merge(:some_field => 'some_value')
|
|
714
|
+
end
|
|
715
|
+
end
|
|
716
|
+
```
|
|
717
|
+
|
|
718
|
+
## Database Poller
|
|
719
|
+
|
|
720
|
+
Another method of fetching updates from the database to Kafka is by polling
|
|
721
|
+
the database (a process popularized by [Kafka Connect](https://docs.confluent.io/current/connect/index.html)).
|
|
722
|
+
Deimos provides a database poller, which allows you the same pattern but
|
|
723
|
+
with all the flexibility of real Ruby code, and the added advantage of having
|
|
724
|
+
a single consistent framework to talk to Kafka.
|
|
725
|
+
|
|
726
|
+
One of the disadvantages of polling the database is that it can't detect deletions.
|
|
727
|
+
You can get over this by configuring a mixin to send messages *only* on deletion,
|
|
728
|
+
and use the poller to handle all other updates. You can reuse the same producer
|
|
729
|
+
for both cases to handle joins, changes/mappings, business logic, etc.
|
|
730
|
+
|
|
731
|
+
To enable the poller, generate the migration:
|
|
732
|
+
|
|
733
|
+
```ruby
|
|
734
|
+
rails g deimos:db_poller
|
|
735
|
+
```
|
|
736
|
+
|
|
737
|
+
Run the migration:
|
|
738
|
+
|
|
739
|
+
```ruby
|
|
740
|
+
rails db:migrate
|
|
741
|
+
```
|
|
742
|
+
|
|
743
|
+
Add the following configuration:
|
|
744
|
+
|
|
745
|
+
```ruby
|
|
746
|
+
Deimos.configure do
|
|
747
|
+
db_poller do
|
|
748
|
+
producer_class 'MyProducer' # an ActiveRecordProducer
|
|
749
|
+
end
|
|
750
|
+
db_poller do
|
|
751
|
+
producer_class 'MyOtherProducer'
|
|
752
|
+
run_every 2.minutes
|
|
753
|
+
delay 5.seconds # to allow for transactions to finish
|
|
754
|
+
full_table true # if set, dump the entire table every run; use for small tables
|
|
755
|
+
end
|
|
756
|
+
end
|
|
757
|
+
```
|
|
758
|
+
|
|
759
|
+
All the information around connecting and querying the database lives in the
|
|
760
|
+
producer itself, so you don't need to write any additional code. You can
|
|
761
|
+
define one additional method on the producer:
|
|
762
|
+
|
|
763
|
+
```ruby
|
|
764
|
+
class MyProducer < Deimos::ActiveRecordProducer
|
|
765
|
+
...
|
|
766
|
+
def poll_query(time_from:, time_to:, column_name:, min_id:)
|
|
767
|
+
# Default is to use the timestamp `column_name` to find all records
|
|
768
|
+
# between time_from and time_to, or records where `updated_at` is equal to
|
|
769
|
+
# `time_from` but its ID is greater than `min_id`. This is called
|
|
770
|
+
# successively as the DB is polled to ensure even if a batch ends in the
|
|
771
|
+
# middle of a timestamp, we won't miss any records.
|
|
772
|
+
# You can override or change this behavior if necessary.
|
|
773
|
+
end
|
|
774
|
+
end
|
|
775
|
+
```
|
|
776
|
+
|
|
777
|
+
To run the DB poller:
|
|
778
|
+
|
|
779
|
+
rake deimos:db_poller
|
|
780
|
+
|
|
781
|
+
Note that the DB poller creates one thread per configured poller, and is
|
|
782
|
+
currently designed *not* to be scaled out - i.e. it assumes you will only
|
|
783
|
+
have one process running at a time. If a particular poll takes longer than
|
|
784
|
+
the poll interval (i.e. interval is set at 1 minute but it takes 75 seconds)
|
|
785
|
+
the next poll will begin immediately following the first one completing.
|
|
786
|
+
|
|
787
|
+
## Running consumers
|
|
788
|
+
|
|
789
|
+
Deimos includes a rake task. Once it's in your gemfile, just run
|
|
790
|
+
|
|
791
|
+
rake deimos:start
|
|
792
|
+
|
|
793
|
+
This will automatically set an environment variable called `DEIMOS_RAKE_TASK`,
|
|
794
|
+
which can be useful if you want to figure out if you're inside the task
|
|
795
|
+
as opposed to running your Rails server or console. E.g. you could start your
|
|
796
|
+
DB backend only when your rake task is running.
|
|
797
|
+
|
|
798
|
+
# Metrics
|
|
799
|
+
|
|
800
|
+
Deimos includes some metrics reporting out the box. It ships with DataDog support, but you can add custom metric providers as well.
|
|
801
|
+
|
|
802
|
+
The following metrics are reported:
|
|
803
|
+
* `consumer_lag` - for each partition, the number of messages
|
|
804
|
+
it's behind the tail of the partition (a gauge). This is only sent if
|
|
805
|
+
`config.consumers.report_lag` is set to true.
|
|
806
|
+
* `handler` - a count of the number of messages received. Tagged
|
|
807
|
+
with the following:
|
|
808
|
+
* `topic:{topic_name}`
|
|
809
|
+
* `status:received`
|
|
810
|
+
* `status:success`
|
|
811
|
+
* `status:error`
|
|
812
|
+
* `time:consume` (histogram)
|
|
813
|
+
* Amount of time spent executing handler for each message
|
|
814
|
+
* Batch Consumers - report counts by number of batches
|
|
815
|
+
* `status:batch_received`
|
|
816
|
+
* `status:batch_success`
|
|
817
|
+
* `status:batch_error`
|
|
818
|
+
* `time:consume_batch` (histogram)
|
|
819
|
+
* Amount of time spent executing handler for entire batch
|
|
820
|
+
* `time:time_delayed` (histogram)
|
|
821
|
+
* Indicates the amount of time between the `timestamp` property of each
|
|
822
|
+
payload (if present) and the time that the consumer started processing
|
|
823
|
+
the message.
|
|
824
|
+
* `publish` - a count of the number of messages received. Tagged
|
|
825
|
+
with `topic:{topic_name}`
|
|
826
|
+
* `publish_error` - a count of the number of messages which failed
|
|
827
|
+
to publish. Tagged with `topic:{topic_name}`
|
|
828
|
+
* `pending_db_messages_max_wait` - the number of seconds which the
|
|
829
|
+
oldest KafkaMessage in the database has been waiting for, for use
|
|
830
|
+
with the database backend. Tagged with the topic that is waiting.
|
|
831
|
+
Will send a value of 0 with no topics tagged if there are no messages
|
|
832
|
+
waiting.
|
|
833
|
+
* `db_producer.insert` - the number of messages inserted into the database
|
|
834
|
+
for publishing. Tagged with `topic:{topic_name}`
|
|
835
|
+
* `db_producer.process` - the number of DB messages processed. Note that this
|
|
836
|
+
is *not* the same as the number of messages *published* if those messages
|
|
837
|
+
are compacted. Tagged with `topic:{topic_name}`
|
|
838
|
+
|
|
839
|
+
### Configuring Metrics Providers
|
|
840
|
+
|
|
841
|
+
See the `metrics` field under [Configuration](CONFIGURATION.md).
|
|
842
|
+
View all available Metrics Providers [here](lib/deimos/metrics/metrics_providers)
|
|
843
|
+
|
|
844
|
+
### Custom Metrics Providers
|
|
845
|
+
|
|
846
|
+
Using the above configuration, it is possible to pass in any generic Metrics
|
|
847
|
+
Provider class as long as it exposes the methods and definitions expected by
|
|
848
|
+
the Metrics module.
|
|
849
|
+
The easiest way to do this is to inherit from the `Metrics::Provider` class
|
|
850
|
+
and implement the methods in it.
|
|
851
|
+
|
|
852
|
+
See the [Mock provider](lib/deimos/metrics/mock.rb) as an example. It implements a constructor which receives config, plus the required metrics methods.
|
|
853
|
+
|
|
854
|
+
Also see [deimos.rb](lib/deimos.rb) under `Configure metrics` to see how the metrics module is called.
|
|
855
|
+
|
|
856
|
+
# Tracing
|
|
857
|
+
|
|
858
|
+
Deimos also includes some tracing for kafka consumers. It ships with
|
|
859
|
+
DataDog support, but you can add custom tracing providers as well.
|
|
860
|
+
|
|
861
|
+
Trace spans are used for when incoming messages are schema-decoded, and a
|
|
862
|
+
separate span for message consume logic.
|
|
863
|
+
|
|
864
|
+
### Configuring Tracing Providers
|
|
865
|
+
|
|
866
|
+
See the `tracing` field under [Configuration](#configuration).
|
|
867
|
+
View all available Tracing Providers [here](lib/deimos/tracing)
|
|
868
|
+
|
|
869
|
+
### Custom Tracing Providers
|
|
870
|
+
|
|
871
|
+
Using the above configuration, it is possible to pass in any generic Tracing
|
|
872
|
+
Provider class as long as it exposes the methods and definitions expected by
|
|
873
|
+
the Tracing module.
|
|
874
|
+
The easiest way to do this is to inherit from the `Tracing::Provider` class
|
|
875
|
+
and implement the methods in it.
|
|
876
|
+
|
|
877
|
+
See the [Mock provider](lib/deimos/tracing/mock.rb) as an example. It implements a constructor which receives config, plus the required tracing methods.
|
|
878
|
+
|
|
879
|
+
Also see [deimos.rb](lib/deimos.rb) under `Configure tracing` to see how the tracing module is called.
|
|
880
|
+
|
|
881
|
+
# Testing
|
|
882
|
+
|
|
883
|
+
Deimos comes with a test helper class which sets the various backends
|
|
884
|
+
to mock versions, and provides useful methods for testing consumers.
|
|
885
|
+
|
|
886
|
+
In `spec_helper.rb`:
|
|
887
|
+
```ruby
|
|
888
|
+
RSpec.configure do |config|
|
|
889
|
+
config.include Deimos::TestHelpers
|
|
890
|
+
end
|
|
891
|
+
```
|
|
892
|
+
|
|
893
|
+
In your test, you now have the following methods available:
|
|
894
|
+
```ruby
|
|
895
|
+
# Pass a consumer class (not instance) to validate a payload against it.
|
|
896
|
+
# This will fail if the payload does not match the schema the consumer
|
|
897
|
+
# is set up to consume.
|
|
898
|
+
test_consume_message(MyConsumer,
|
|
899
|
+
{ 'some-payload' => 'some-value' }) do |payload, metadata|
|
|
900
|
+
# do some expectation handling here
|
|
901
|
+
end
|
|
902
|
+
|
|
903
|
+
# You can also pass a topic name instead of the consumer class as long
|
|
904
|
+
# as the topic is configured in your Deimos configuration:
|
|
905
|
+
test_consume_message('my-topic-name',
|
|
906
|
+
{ 'some-payload' => 'some-value' }) do |payload, metadata|
|
|
907
|
+
# do some expectation handling here
|
|
908
|
+
end
|
|
909
|
+
|
|
910
|
+
# Alternatively, you can test the actual consume logic:
|
|
911
|
+
test_consume_message(MyConsumer,
|
|
912
|
+
{ 'some-payload' => 'some-value' },
|
|
913
|
+
call_original: true)
|
|
914
|
+
|
|
915
|
+
# Test that a given payload is invalid against the schema:
|
|
916
|
+
test_consume_invalid_message(MyConsumer,
|
|
917
|
+
{ 'some-invalid-payload' => 'some-value' })
|
|
918
|
+
|
|
919
|
+
# For batch consumers, there are similar methods such as:
|
|
920
|
+
test_consume_batch(MyBatchConsumer,
|
|
921
|
+
[{ 'some-payload' => 'some-value' },
|
|
922
|
+
{ 'some-payload' => 'some-other-value' }]) do |payloads, metadata|
|
|
923
|
+
# Expectations here
|
|
924
|
+
end
|
|
925
|
+
|
|
926
|
+
## Producing
|
|
927
|
+
|
|
928
|
+
# A matcher which allows you to test that a message was sent on the given
|
|
929
|
+
# topic, without having to know which class produced it.
|
|
930
|
+
expect(topic_name).to have_sent(payload, key=nil)
|
|
931
|
+
|
|
932
|
+
# Inspect sent messages
|
|
933
|
+
message = Deimos::Backends::Test.sent_messages[0]
|
|
934
|
+
expect(message).to eq({
|
|
935
|
+
message: {'some-key' => 'some-value'},
|
|
936
|
+
topic: 'my-topic',
|
|
937
|
+
key: 'my-id'
|
|
938
|
+
})
|
|
939
|
+
```
|
|
940
|
+
|
|
941
|
+
There is also a helper method that will let you test if an existing schema
|
|
942
|
+
would be compatible with a new version of it. You can use this in your
|
|
943
|
+
Ruby console but it would likely not be part of your RSpec test:
|
|
944
|
+
|
|
945
|
+
```ruby
|
|
946
|
+
require 'deimos/test_helpers'
|
|
947
|
+
# Can pass a file path, a string or a hash into this:
|
|
948
|
+
Deimos::TestHelpers.schemas_compatible?(schema1, schema2)
|
|
949
|
+
```
|
|
950
|
+
### Test Helpers
|
|
951
|
+
|
|
952
|
+
There are helper methods available to configure Deimos for different types of testing scenarios.
|
|
953
|
+
Currently there are helpers defined for unit tests and for testing Kafka related code. You can use it as follows:
|
|
954
|
+
|
|
955
|
+
```ruby
|
|
956
|
+
# The following can be added to a rpsec file so that each unit
|
|
957
|
+
# test can have the same settings every time it is run
|
|
958
|
+
around(:each) do |example|
|
|
959
|
+
Deimos::TestHelpers.unit_test!
|
|
960
|
+
example.run
|
|
961
|
+
Deimos.config.reset!
|
|
962
|
+
end
|
|
963
|
+
|
|
964
|
+
# Similarly you can use the Kafka test helper
|
|
965
|
+
around(:each) do |example|
|
|
966
|
+
Deimos::TestHelpers.kafka_test!
|
|
967
|
+
example.run
|
|
968
|
+
Deimos.config.reset!
|
|
969
|
+
end
|
|
970
|
+
|
|
971
|
+
# Kakfa test helper using schema registry
|
|
972
|
+
around(:each) do |example|
|
|
973
|
+
Deimos::TestHelpers.full_integration_test!
|
|
974
|
+
example.run
|
|
975
|
+
Deimos.config.reset!
|
|
976
|
+
end
|
|
977
|
+
```
|
|
978
|
+
|
|
979
|
+
With the help of these helper methods, rspec examples can be written without having to tinker with Deimos settings.
|
|
980
|
+
This also prevents Deimos setting changes from leaking in to other examples.
|
|
981
|
+
|
|
982
|
+
This does not take away the ability to configure Deimos manually in individual examples. Deimos can still be configured like so:
|
|
983
|
+
```ruby
|
|
984
|
+
it 'should not fail this random test' do
|
|
985
|
+
|
|
986
|
+
Deimos.configure do |config|
|
|
987
|
+
config.consumers.fatal_error = proc { true }
|
|
988
|
+
config.consumers.reraise_errors = false
|
|
989
|
+
end
|
|
990
|
+
...
|
|
991
|
+
expect(some_object).to be_truthy
|
|
992
|
+
...
|
|
993
|
+
Deimos.config.reset!
|
|
994
|
+
end
|
|
995
|
+
```
|
|
996
|
+
If you are using one of the test helpers in an `around(:each)` block and want to override few settings for one example,
|
|
997
|
+
you can do it like in the example shown above. These settings would only apply to that specific example and the Deimos conifg should
|
|
998
|
+
reset once the example has finished running.
|
|
999
|
+
|
|
1000
|
+
|
|
1001
|
+
### Integration Test Helpers
|
|
1002
|
+
|
|
1003
|
+
When running integration tests, you'll want to override the default test helper settings:
|
|
1004
|
+
|
|
1005
|
+
```ruby
|
|
1006
|
+
config.before(:each, :my_integration_metadata) do
|
|
1007
|
+
Deimos.configure do
|
|
1008
|
+
producers.backend :kafka
|
|
1009
|
+
schema.backend :avro_schema_registry
|
|
1010
|
+
end
|
|
1011
|
+
end
|
|
1012
|
+
```
|
|
1013
|
+
|
|
1014
|
+
You can use the `InlineConsumer` class to help with integration testing,
|
|
1015
|
+
with a full external Kafka running.
|
|
1016
|
+
|
|
1017
|
+
If you have a consumer you want to test against messages in a Kafka topic,
|
|
1018
|
+
use the `consume` method:
|
|
1019
|
+
```ruby
|
|
1020
|
+
Deimos::Utils::InlineConsumer.consume(
|
|
1021
|
+
topic: 'my-topic',
|
|
1022
|
+
frk_consumer: MyConsumerClass,
|
|
1023
|
+
num_messages: 5
|
|
1024
|
+
)
|
|
1025
|
+
```
|
|
1026
|
+
|
|
1027
|
+
This is a _synchronous_ call which will run the consumer against the
|
|
1028
|
+
last 5 messages in the topic. You can set `num_messages` to a number
|
|
1029
|
+
like `1_000_000` to always consume all the messages. Once the last
|
|
1030
|
+
message is retrieved, the process will wait 1 second to make sure
|
|
1031
|
+
they're all done, then continue execution.
|
|
1032
|
+
|
|
1033
|
+
If you just want to retrieve the contents of a topic, you can use
|
|
1034
|
+
the `get_messages_for` method:
|
|
1035
|
+
|
|
1036
|
+
```ruby
|
|
1037
|
+
Deimos::Utils::InlineConsumer.get_messages_for(
|
|
1038
|
+
topic: 'my-topic',
|
|
1039
|
+
schema: 'my-schema',
|
|
1040
|
+
namespace: 'my.namespace',
|
|
1041
|
+
key_config: { field: 'id' },
|
|
1042
|
+
num_messages: 5
|
|
1043
|
+
)
|
|
1044
|
+
```
|
|
1045
|
+
|
|
1046
|
+
This will run the process and simply return the last 5 messages on the
|
|
1047
|
+
topic, as hashes, once it's done. The format of the messages will simply
|
|
1048
|
+
be
|
|
1049
|
+
```ruby
|
|
1050
|
+
{
|
|
1051
|
+
payload: { key: value }, # payload hash here
|
|
1052
|
+
key: "some_value" # key value or hash here
|
|
1053
|
+
}
|
|
1054
|
+
```
|
|
1055
|
+
|
|
1056
|
+
Both payload and key will be schema-decoded as necessary according to the
|
|
1057
|
+
key config.
|
|
1058
|
+
|
|
1059
|
+
You can also just pass an existing producer or consumer class into the method,
|
|
1060
|
+
and it will extract the necessary configuration from it:
|
|
1061
|
+
|
|
1062
|
+
```ruby
|
|
1063
|
+
Deimos::Utils::InlineConsumer.get_messages_for(
|
|
1064
|
+
topic: 'my-topic',
|
|
1065
|
+
config_class: MyProducerClass,
|
|
1066
|
+
num_messages: 5
|
|
1067
|
+
)
|
|
1068
|
+
```
|
|
1069
|
+
|
|
1070
|
+
## Utilities
|
|
1071
|
+
|
|
1072
|
+
You can use your configured schema backend directly if you want to
|
|
1073
|
+
encode and decode payloads outside of the context of sending messages.
|
|
1074
|
+
|
|
1075
|
+
```ruby
|
|
1076
|
+
backend = Deimos.schema_backend(schema: 'MySchema', namespace: 'com.my-namespace')
|
|
1077
|
+
encoded = backend.encode(my_payload)
|
|
1078
|
+
decoded = backend.decode(my_encoded_payload)
|
|
1079
|
+
coerced = backend.coerce(my_payload) # coerce to correct types
|
|
1080
|
+
backend.validate(my_payload) # throws an error if not valid
|
|
1081
|
+
fields = backend.schema_fields # list of fields defined in the schema
|
|
1082
|
+
```
|
|
1083
|
+
|
|
1084
|
+
## Contributing
|
|
1085
|
+
|
|
1086
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/flipp-oss/deimos .
|
|
1087
|
+
|
|
1088
|
+
### Linting
|
|
1089
|
+
|
|
1090
|
+
Deimos uses Rubocop to lint the code. Please run Rubocop on your code
|
|
1091
|
+
before submitting a PR.
|
|
1092
|
+
|
|
1093
|
+
---
|
|
1094
|
+
<p align="center">
|
|
1095
|
+
Sponsored by<br/>
|
|
1096
|
+
<a href="https://corp.flipp.com/">
|
|
1097
|
+
<img src="support/flipp-logo.png" title="Flipp logo" style="border:none;"/>
|
|
1098
|
+
</a>
|
|
1099
|
+
</p>
|