deimos-temp-fork 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.circleci/config.yml +83 -0
- data/.gitignore +41 -0
- data/.gitmodules +0 -0
- data/.rspec +1 -0
- data/.rubocop.yml +333 -0
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/CHANGELOG.md +349 -0
- data/CODE_OF_CONDUCT.md +77 -0
- data/Dockerfile +23 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +286 -0
- data/Guardfile +22 -0
- data/LICENSE.md +195 -0
- data/README.md +1099 -0
- data/Rakefile +13 -0
- data/bin/deimos +4 -0
- data/deimos-ruby.gemspec +44 -0
- data/docker-compose.yml +71 -0
- data/docs/ARCHITECTURE.md +140 -0
- data/docs/CONFIGURATION.md +236 -0
- data/docs/DATABASE_BACKEND.md +147 -0
- data/docs/INTEGRATION_TESTS.md +52 -0
- data/docs/PULL_REQUEST_TEMPLATE.md +35 -0
- data/docs/UPGRADING.md +128 -0
- data/lib/deimos-temp-fork.rb +95 -0
- data/lib/deimos/active_record_consume/batch_consumption.rb +164 -0
- data/lib/deimos/active_record_consume/batch_slicer.rb +27 -0
- data/lib/deimos/active_record_consume/message_consumption.rb +79 -0
- data/lib/deimos/active_record_consume/schema_model_converter.rb +52 -0
- data/lib/deimos/active_record_consumer.rb +67 -0
- data/lib/deimos/active_record_producer.rb +87 -0
- data/lib/deimos/backends/base.rb +32 -0
- data/lib/deimos/backends/db.rb +41 -0
- data/lib/deimos/backends/kafka.rb +33 -0
- data/lib/deimos/backends/kafka_async.rb +33 -0
- data/lib/deimos/backends/test.rb +20 -0
- data/lib/deimos/batch_consumer.rb +7 -0
- data/lib/deimos/config/configuration.rb +381 -0
- data/lib/deimos/config/phobos_config.rb +137 -0
- data/lib/deimos/consume/batch_consumption.rb +150 -0
- data/lib/deimos/consume/message_consumption.rb +94 -0
- data/lib/deimos/consumer.rb +104 -0
- data/lib/deimos/instrumentation.rb +76 -0
- data/lib/deimos/kafka_message.rb +60 -0
- data/lib/deimos/kafka_source.rb +128 -0
- data/lib/deimos/kafka_topic_info.rb +102 -0
- data/lib/deimos/message.rb +79 -0
- data/lib/deimos/metrics/datadog.rb +47 -0
- data/lib/deimos/metrics/mock.rb +39 -0
- data/lib/deimos/metrics/provider.rb +36 -0
- data/lib/deimos/monkey_patches/phobos_cli.rb +35 -0
- data/lib/deimos/monkey_patches/phobos_producer.rb +51 -0
- data/lib/deimos/poll_info.rb +9 -0
- data/lib/deimos/producer.rb +224 -0
- data/lib/deimos/railtie.rb +8 -0
- data/lib/deimos/schema_backends/avro_base.rb +140 -0
- data/lib/deimos/schema_backends/avro_local.rb +30 -0
- data/lib/deimos/schema_backends/avro_schema_coercer.rb +119 -0
- data/lib/deimos/schema_backends/avro_schema_registry.rb +34 -0
- data/lib/deimos/schema_backends/avro_validation.rb +21 -0
- data/lib/deimos/schema_backends/base.rb +150 -0
- data/lib/deimos/schema_backends/mock.rb +42 -0
- data/lib/deimos/shared_config.rb +63 -0
- data/lib/deimos/test_helpers.rb +360 -0
- data/lib/deimos/tracing/datadog.rb +35 -0
- data/lib/deimos/tracing/mock.rb +40 -0
- data/lib/deimos/tracing/provider.rb +29 -0
- data/lib/deimos/utils/db_poller.rb +150 -0
- data/lib/deimos/utils/db_producer.rb +243 -0
- data/lib/deimos/utils/deadlock_retry.rb +68 -0
- data/lib/deimos/utils/inline_consumer.rb +150 -0
- data/lib/deimos/utils/lag_reporter.rb +175 -0
- data/lib/deimos/utils/schema_controller_mixin.rb +115 -0
- data/lib/deimos/version.rb +5 -0
- data/lib/generators/deimos/active_record/templates/migration.rb.tt +28 -0
- data/lib/generators/deimos/active_record/templates/model.rb.tt +5 -0
- data/lib/generators/deimos/active_record_generator.rb +79 -0
- data/lib/generators/deimos/db_backend/templates/migration +25 -0
- data/lib/generators/deimos/db_backend/templates/rails3_migration +31 -0
- data/lib/generators/deimos/db_backend_generator.rb +48 -0
- data/lib/generators/deimos/db_poller/templates/migration +11 -0
- data/lib/generators/deimos/db_poller/templates/rails3_migration +16 -0
- data/lib/generators/deimos/db_poller_generator.rb +48 -0
- data/lib/tasks/deimos.rake +34 -0
- data/spec/active_record_batch_consumer_spec.rb +481 -0
- data/spec/active_record_consume/batch_slicer_spec.rb +42 -0
- data/spec/active_record_consume/schema_model_converter_spec.rb +105 -0
- data/spec/active_record_consumer_spec.rb +154 -0
- data/spec/active_record_producer_spec.rb +85 -0
- data/spec/backends/base_spec.rb +10 -0
- data/spec/backends/db_spec.rb +54 -0
- data/spec/backends/kafka_async_spec.rb +11 -0
- data/spec/backends/kafka_spec.rb +11 -0
- data/spec/batch_consumer_spec.rb +256 -0
- data/spec/config/configuration_spec.rb +248 -0
- data/spec/consumer_spec.rb +209 -0
- data/spec/deimos_spec.rb +169 -0
- data/spec/generators/active_record_generator_spec.rb +56 -0
- data/spec/handlers/my_batch_consumer.rb +10 -0
- data/spec/handlers/my_consumer.rb +10 -0
- data/spec/kafka_listener_spec.rb +55 -0
- data/spec/kafka_source_spec.rb +381 -0
- data/spec/kafka_topic_info_spec.rb +111 -0
- data/spec/message_spec.rb +19 -0
- data/spec/phobos.bad_db.yml +73 -0
- data/spec/phobos.yml +77 -0
- data/spec/producer_spec.rb +498 -0
- data/spec/rake_spec.rb +19 -0
- data/spec/schema_backends/avro_base_shared.rb +199 -0
- data/spec/schema_backends/avro_local_spec.rb +32 -0
- data/spec/schema_backends/avro_schema_registry_spec.rb +32 -0
- data/spec/schema_backends/avro_validation_spec.rb +24 -0
- data/spec/schema_backends/base_spec.rb +33 -0
- data/spec/schemas/com/my-namespace/Generated.avsc +71 -0
- data/spec/schemas/com/my-namespace/MyNestedSchema.avsc +62 -0
- data/spec/schemas/com/my-namespace/MySchema-key.avsc +13 -0
- data/spec/schemas/com/my-namespace/MySchema.avsc +18 -0
- data/spec/schemas/com/my-namespace/MySchemaCompound-key.avsc +18 -0
- data/spec/schemas/com/my-namespace/MySchemaWithBooleans.avsc +18 -0
- data/spec/schemas/com/my-namespace/MySchemaWithDateTimes.avsc +33 -0
- data/spec/schemas/com/my-namespace/MySchemaWithId.avsc +28 -0
- data/spec/schemas/com/my-namespace/MySchemaWithUniqueId.avsc +32 -0
- data/spec/schemas/com/my-namespace/Wibble.avsc +43 -0
- data/spec/schemas/com/my-namespace/Widget.avsc +27 -0
- data/spec/schemas/com/my-namespace/WidgetTheSecond.avsc +27 -0
- data/spec/schemas/com/my-namespace/request/CreateTopic.avsc +11 -0
- data/spec/schemas/com/my-namespace/request/Index.avsc +11 -0
- data/spec/schemas/com/my-namespace/request/UpdateRequest.avsc +11 -0
- data/spec/schemas/com/my-namespace/response/CreateTopic.avsc +11 -0
- data/spec/schemas/com/my-namespace/response/Index.avsc +11 -0
- data/spec/schemas/com/my-namespace/response/UpdateResponse.avsc +11 -0
- data/spec/spec_helper.rb +267 -0
- data/spec/utils/db_poller_spec.rb +320 -0
- data/spec/utils/db_producer_spec.rb +514 -0
- data/spec/utils/deadlock_retry_spec.rb +74 -0
- data/spec/utils/inline_consumer_spec.rb +31 -0
- data/spec/utils/lag_reporter_spec.rb +76 -0
- data/spec/utils/platform_schema_validation_spec.rb +0 -0
- data/spec/utils/schema_controller_mixin_spec.rb +84 -0
- data/support/deimos-solo.png +0 -0
- data/support/deimos-with-name-next.png +0 -0
- data/support/deimos-with-name.png +0 -0
- data/support/flipp-logo.png +0 -0
- metadata +551 -0
@@ -0,0 +1,147 @@
|
|
1
|
+
# Database Backend Design
|
2
|
+
|
3
|
+
Kafka is a messaging protocol, while databases are transactional and relational.
|
4
|
+
Marrying the two (e.g. by using Kafka to publish changes to a database table)
|
5
|
+
is not a simple task. This document describes the problem and the current
|
6
|
+
implementation. There will be references to microservices architecture as that
|
7
|
+
informs some of the discussion.
|
8
|
+
|
9
|
+
## A Pure Solution
|
10
|
+
|
11
|
+
The purest solution is to use Kafka as the "source of truth" by first publishing
|
12
|
+
all messages synchronously to Kafka, and then using a consumer to read these
|
13
|
+
messages back into the database. If there are any errors sending to Kafka,
|
14
|
+
the thread will crash and no data would be written. If there are errors
|
15
|
+
reading the data back into the database, the data remains in Kafka and can
|
16
|
+
be re-read at any time.
|
17
|
+
|
18
|
+
There are several real-world problems with this pure solution:
|
19
|
+
|
20
|
+
1. The solution assumes that creating a consumer is a simple task, which is
|
21
|
+
definitely not the case. Depending on how many topics are being produced to,
|
22
|
+
a separate consumer thread per topic (which is how Phobos works) is overkill.
|
23
|
+
The other option is to introduce an entirely new consumer service to handle
|
24
|
+
consuming the topics we've already produced, which is even more overkill.
|
25
|
+
2. For CRUD interfaces or any other UI that saves data to the database, we do
|
26
|
+
not want to use an asynchronous method to ensure that data is published to
|
27
|
+
Kafka before saving, and then being able to serve that data back to the user
|
28
|
+
in a single API call, which is a common use case.
|
29
|
+
This could involve a large amount of added complexity and may force the user
|
30
|
+
to wait unnecessarily.
|
31
|
+
3. We want to make use of database transactions - i.e. if an error happens
|
32
|
+
saving one record, the others should roll back. Once a message is written to
|
33
|
+
Kafka, it can't be "rolled back" easily. Kafka transactions do exist but
|
34
|
+
they are not widely supported in Kafka clients, and we still would be
|
35
|
+
faced with the fact that one transaction (Kafka or database) could finish
|
36
|
+
and then the process could be killed before the other one could be finished.
|
37
|
+
4. We want to make use of auto-increment database IDs - we can't do this if we
|
38
|
+
write to Kafka first.
|
39
|
+
5. Kafka is an external dependency. If either the DB **or** Kafka goes down,
|
40
|
+
our app becomes unusable.
|
41
|
+
|
42
|
+
Using tools like Kafka Connect and Debezium are not ideal because:
|
43
|
+
|
44
|
+
1. They are tied very closely to the internal relational schema, which is not
|
45
|
+
ideal, especially for legacy systems. It makes it nearly impossible to make
|
46
|
+
internal changes.
|
47
|
+
2. They are separate services and connectors must be created for each
|
48
|
+
microservice separately, which is a large overhead.
|
49
|
+
|
50
|
+
## Database Backend Solution
|
51
|
+
|
52
|
+
We will be using the database itself as the source of our Kafka messages.
|
53
|
+
We will first write our messages to a database table and then asynchronously
|
54
|
+
send those messages to Kafka. This solves our problems:
|
55
|
+
|
56
|
+
1. The database is the (interim) source of truth. The Kafka message log is
|
57
|
+
essentially the changelog, which we can tail and send out. If our producing
|
58
|
+
thread errors out, a new one will simply pick up where it left off.
|
59
|
+
This ensures eventual consistency.
|
60
|
+
2. Because we are only using the database in the main application thread, we do
|
61
|
+
not need to wait for Kafka production to continue and can return immediately.
|
62
|
+
3. Because we are only saving to the database, we can use transactions normally
|
63
|
+
- if a transaction fails, it will roll back along with any Kafka messages we
|
64
|
+
intended to send.
|
65
|
+
4. Records are saved normally and messages are created after that, all as part
|
66
|
+
of the transaction, so we can use database IDs as usual.
|
67
|
+
5. We remove Kafka entirely as a dependency for normal work - the Kafka sending
|
68
|
+
piece is a separate thread.
|
69
|
+
|
70
|
+
The one downside to this is a slight delay (generally less than 1 second)
|
71
|
+
between the message being written to the database and sent to Kafka - in most
|
72
|
+
cases this is an acceptable limitation.
|
73
|
+
|
74
|
+
### The Implementation
|
75
|
+
|
76
|
+
The database backend consists of three tables:
|
77
|
+
|
78
|
+
* `kafka_messages` - this keeps track of the messages that were "published",
|
79
|
+
including the payload, topic, key and partition key. These messages
|
80
|
+
are *raw data* - all processing, including schema-encoding, must happen
|
81
|
+
upstream before they are inserted.
|
82
|
+
* `kafka_topic_info` - this table is essentially a lock table used to ensure
|
83
|
+
that only one producer thread is ever "working" on a topic at a time.
|
84
|
+
|
85
|
+
The backend code structure is such that when a producer calls `publish_list`,
|
86
|
+
it delegates that logic to the configured backend. A backend of `kafka`
|
87
|
+
or `kafka_async` will use existing Phobos logic. A backend of `db` will use
|
88
|
+
the database backend instead.
|
89
|
+
|
90
|
+
### "Publishing" A Message
|
91
|
+
|
92
|
+
When `publish_list` is called when the database backend is configured,
|
93
|
+
Deimos will instead save the message to the `kafka_messages` table.
|
94
|
+
|
95
|
+
### Sending Messages to Kafka
|
96
|
+
|
97
|
+
The database executor is started by calling `Deimos.start_db_backend!`
|
98
|
+
with a specified number of threads. These threads will continually scan the
|
99
|
+
two messages tables and send the messages to Kafka.
|
100
|
+
|
101
|
+
The algorithm for sending the messages makes use of the `kafka_topic_info` table as a lock table. There is also an `error` boolean column which is used to track when a topic has errored out. When this happens, the topic is marked as errored and will not be picked up for the next minute, after which it will be treated as any other topic. The full algorithm is as follows:
|
102
|
+
|
103
|
+
* Create a UUID for the thread - this is created once on thread start.
|
104
|
+
* Find all unique topics in the `kafka_messages` table.
|
105
|
+
* For each topic:
|
106
|
+
* Create an entry in `kafka_topic_info` for this topic if it doesn't exist.
|
107
|
+
* Run the following query:
|
108
|
+
|
109
|
+
```sql
|
110
|
+
UPDATE kafka_topic_info
|
111
|
+
SET locked_by=#{uuid}, locked_at=NOW(), error=0
|
112
|
+
WHERE (locked_by IS NULL AND error=0) OR locked_at < #{1.minute.ago}
|
113
|
+
LIMIT 1
|
114
|
+
```
|
115
|
+
* If the lock was unsuccessful, move on to the next topic in the list
|
116
|
+
* Find the first 1000 messages in `kafka_messages` for that topic, ordered by ID (insertion order)
|
117
|
+
* Send the messages synchronously to Kafka, with all brokers acking the message.
|
118
|
+
* Delete the records from the DB
|
119
|
+
* Update the `locked_at` timestamp in `kafka_topic_info` to `NOW()` to ensure liveness in case a particular batch took longer than expected to send.
|
120
|
+
* If the current batch is 1000 messages, repeat with the next batch of
|
121
|
+
messages until it returns less than 1000
|
122
|
+
* When all batches are sent:
|
123
|
+
* Unlock the topic by updating the `kafka_topic_info` for this topic, setting `locked_by=NULL, locked_at=NULL, error=0, retries=0`
|
124
|
+
* Move on to the next topic
|
125
|
+
* If there are errors sending a batch:
|
126
|
+
* Update the `kafka_topic_info` for this topic to have `locked_by=NULL, locked_at=NULL, error=1, retries=retries+1` - this will effectively keep it
|
127
|
+
locked for the next minute
|
128
|
+
* Move on to the next topic.
|
129
|
+
* When all topics are done, or if there are no topics, sleep for 0.5 seconds and begin again.
|
130
|
+
|
131
|
+
### Class / Method Design
|
132
|
+
|
133
|
+
The algorithm is split up into the following classes:
|
134
|
+
|
135
|
+
* Backends::Db - this is the class that saves the message to the database.
|
136
|
+
* KafkaMessage: This is an ActiveRecord class that handle saving the messages to the database and querying them.
|
137
|
+
* KafkaTopicInfo: This is an ActiveRecord class that handles locking, unlocking and heartbeating.
|
138
|
+
* Utils::SignalHandler: This is the equivalent of Phobos's Runner class and
|
139
|
+
handles the KILL, INT and TERM signals to gracefully shut down the threads.
|
140
|
+
* Utils::Executor is the equivalent of Phobos's Executor class and handles
|
141
|
+
the thread pool of producer threads.
|
142
|
+
* Utils::DbProducer is the producer thread itself which implements most of the
|
143
|
+
algorithm listed above.
|
144
|
+
|
145
|
+
### Caveats
|
146
|
+
|
147
|
+
There is one disadvantage of this pattern, which is that it is possible for events to be sent multiple times if the thread which sent the messages dies before being able to delete it from the database. In general this is an acceptable effect, since Kafka only guarantees at-least-once delivery in any case.
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# Running Integration Tests
|
2
|
+
|
3
|
+
This repo includes integration tests in the [spec/utils](spec/utils) directory.
|
4
|
+
Here, there are tests for more deimos features that include a database integration like
|
5
|
+
* [Database Poller](README.md#Database Poller)
|
6
|
+
* [Database Backend](docs/DATABASE_BACKEND.md)
|
7
|
+
* [Deadlock Retrying](lib/deimos/utils/deadlock_retry.rb)
|
8
|
+
|
9
|
+
You will need to set up the following databases to develop and create unit tests in these test suites.
|
10
|
+
* [SQLite](#SQLite)
|
11
|
+
* [MySQL](#MySQL)
|
12
|
+
* [PostgreSQL](#PostgreSQL)
|
13
|
+
|
14
|
+
### SQLite
|
15
|
+
This database is covered through the `sqlite3` gem.
|
16
|
+
|
17
|
+
## MySQL
|
18
|
+
### Setting up a local MySQL server (Mac)
|
19
|
+
```bash
|
20
|
+
# Download MySQL (Optionally, choose a version you are comfortable with)
|
21
|
+
brew install mysql
|
22
|
+
# Start automatically after rebooting your machine
|
23
|
+
brew services start mysql
|
24
|
+
|
25
|
+
# Cleanup once you are done with MySQL
|
26
|
+
brew services stop mysql
|
27
|
+
```
|
28
|
+
|
29
|
+
## PostgreSQL
|
30
|
+
### Setting up a local PostgreSQL server (Mac)
|
31
|
+
```bash
|
32
|
+
# Install postgres if it's not already installed
|
33
|
+
brew install postgres
|
34
|
+
|
35
|
+
# Initialize and Start up postgres db
|
36
|
+
brew services start postgres
|
37
|
+
initdb /usr/local/var/postgres
|
38
|
+
# Create the default database and user
|
39
|
+
# Use the password "root"
|
40
|
+
createuser -s --password postgres
|
41
|
+
|
42
|
+
# Cleanup once done with Postgres
|
43
|
+
killall postgres
|
44
|
+
brew services stop postgres
|
45
|
+
```
|
46
|
+
|
47
|
+
## Running Integration Tests
|
48
|
+
You must specify the tag "integration" when running these these test suites.
|
49
|
+
This can be done through the CLI with the `--tag integration` argument.
|
50
|
+
```bash
|
51
|
+
rspec spec/utils/ --tag integration
|
52
|
+
```
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# Pull Request Template
|
2
|
+
|
3
|
+
## Description
|
4
|
+
|
5
|
+
Please include a summary of the change and which issue is fixed. Please also include relevant motivation and context. List any dependencies that are required for this change.
|
6
|
+
|
7
|
+
Fixes # (issue)
|
8
|
+
|
9
|
+
## Type of change
|
10
|
+
|
11
|
+
Please delete options that are not relevant.
|
12
|
+
|
13
|
+
- [ ] Bug fix (non-breaking change which fixes an issue)
|
14
|
+
- [ ] New feature (non-breaking change which adds functionality)
|
15
|
+
- [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected)
|
16
|
+
- [ ] This change requires a documentation update
|
17
|
+
|
18
|
+
## How Has This Been Tested?
|
19
|
+
|
20
|
+
Please describe the tests that you ran to verify your changes. Provide instructions so we can reproduce. Please also list any relevant details for your test configuration
|
21
|
+
|
22
|
+
- [ ] Test A
|
23
|
+
- [ ] Test B
|
24
|
+
|
25
|
+
## Checklist:
|
26
|
+
|
27
|
+
- [ ] My code follows the style guidelines of this project
|
28
|
+
- [ ] I have performed a self-review of my own code
|
29
|
+
- [ ] I have commented my code, particularly in hard-to-understand areas
|
30
|
+
- [ ] I have made corresponding changes to the documentation
|
31
|
+
- [ ] I have added a line in the CHANGELOG describing this change, under the UNRELEASED heading
|
32
|
+
- [ ] My changes generate no new warnings
|
33
|
+
- [ ] I have added tests that prove my fix is effective or that my feature works
|
34
|
+
- [ ] New and existing unit tests pass locally with my changes
|
35
|
+
- [ ] Any dependent changes have been merged and published in downstream modules
|
data/docs/UPGRADING.md
ADDED
@@ -0,0 +1,128 @@
|
|
1
|
+
# Upgrading Deimos
|
2
|
+
|
3
|
+
## Upgrading from < 1.5.0 to >= 1.5.0
|
4
|
+
|
5
|
+
If you are using Confluent's schema registry to Avro-encode your
|
6
|
+
messages, you will need to manually include the `avro_turf` gem
|
7
|
+
in your Gemfile now.
|
8
|
+
|
9
|
+
This update changes how to interact with Deimos's schema classes.
|
10
|
+
Although these are meant to be internal, they are still "public"
|
11
|
+
and can be used by calling code.
|
12
|
+
|
13
|
+
Before 1.5.0:
|
14
|
+
|
15
|
+
```ruby
|
16
|
+
encoder = Deimos::AvroDataEncoder.new(schema: 'MySchema',
|
17
|
+
namespace: 'com.my-namespace')
|
18
|
+
encoder.encode(my_payload)
|
19
|
+
|
20
|
+
decoder = Deimos::AvroDataDecoder.new(schema: 'MySchema',
|
21
|
+
namespace: 'com.my-namespace')
|
22
|
+
decoder.decode(my_payload)
|
23
|
+
```
|
24
|
+
|
25
|
+
After 1.5.0:
|
26
|
+
```ruby
|
27
|
+
backend = Deimos.schema_backend(schema: 'MySchema', namespace: 'com.my-namespace')
|
28
|
+
backend.encode(my_payload)
|
29
|
+
backend.decode(my_payload)
|
30
|
+
```
|
31
|
+
|
32
|
+
The two classes are different and if you are using them to e.g.
|
33
|
+
inspect Avro schema fields, please look at the source code for the following:
|
34
|
+
* `Deimos::SchemaBackends::Base`
|
35
|
+
* `Deimos::SchemaBackends::AvroBase`
|
36
|
+
* `Deimos::SchemaBackends::AvroSchemaRegistry`
|
37
|
+
|
38
|
+
Deprecated `Deimos::TestHelpers.sent_messages` in favor of
|
39
|
+
`Deimos::Backends::Test.sent_messages`.
|
40
|
+
|
41
|
+
## Upgrading from < 1.4.0 to >= 1.4.0
|
42
|
+
|
43
|
+
Previously, configuration was handled as follows:
|
44
|
+
* Kafka configuration, including listeners, lived in `phobos.yml`
|
45
|
+
* Additional Deimos configuration would live in an initializer, e.g. `kafka.rb`
|
46
|
+
* Producer and consumer configuration lived in each individual producer and consumer
|
47
|
+
|
48
|
+
As of 1.4.0, all configuration is centralized in one initializer
|
49
|
+
file, using default configuration.
|
50
|
+
|
51
|
+
Before 1.4.0:
|
52
|
+
```yaml
|
53
|
+
# config/phobos.yml
|
54
|
+
logger:
|
55
|
+
file: log/phobos.log
|
56
|
+
level: debug
|
57
|
+
ruby_kafka:
|
58
|
+
level: debug
|
59
|
+
|
60
|
+
kafka:
|
61
|
+
client_id: phobos
|
62
|
+
connect_timeout: 15
|
63
|
+
socket_timeout: 15
|
64
|
+
|
65
|
+
producer:
|
66
|
+
ack_timeout: 5
|
67
|
+
required_acks: :all
|
68
|
+
...
|
69
|
+
|
70
|
+
listeners:
|
71
|
+
- handler: ConsumerTest::MyConsumer
|
72
|
+
topic: my_consume_topic
|
73
|
+
group_id: my_group_id
|
74
|
+
- handler: ConsumerTest::MyBatchConsumer
|
75
|
+
topic: my_batch_consume_topic
|
76
|
+
group_id: my_batch_group_id
|
77
|
+
delivery: inline_batch
|
78
|
+
```
|
79
|
+
|
80
|
+
```ruby
|
81
|
+
# kafka.rb
|
82
|
+
Deimos.configure do |config|
|
83
|
+
config.reraise_consumer_errors = true
|
84
|
+
config.logger = Rails.logger
|
85
|
+
...
|
86
|
+
end
|
87
|
+
|
88
|
+
# my_consumer.rb
|
89
|
+
class ConsumerTest::MyConsumer < Deimos::Producer
|
90
|
+
namespace 'com.my-namespace'
|
91
|
+
schema 'MySchema'
|
92
|
+
topic 'MyTopic'
|
93
|
+
key_config field: :id
|
94
|
+
end
|
95
|
+
```
|
96
|
+
|
97
|
+
After 1.4.0:
|
98
|
+
```ruby
|
99
|
+
kafka.rb
|
100
|
+
Deimos.configure do
|
101
|
+
logger Rails.logger
|
102
|
+
kafka do
|
103
|
+
client_id 'phobos'
|
104
|
+
connect_timeout 15
|
105
|
+
socket_timeout 15
|
106
|
+
end
|
107
|
+
producers.ack_timeout 5
|
108
|
+
producers.required_acks :all
|
109
|
+
...
|
110
|
+
consumer do
|
111
|
+
class_name 'ConsumerTest::MyConsumer'
|
112
|
+
topic 'my_consume_topic'
|
113
|
+
group_id 'my_group_id'
|
114
|
+
namespace 'com.my-namespace'
|
115
|
+
schema 'MySchema'
|
116
|
+
topic 'MyTopic'
|
117
|
+
key_config field: :id
|
118
|
+
end
|
119
|
+
...
|
120
|
+
end
|
121
|
+
```
|
122
|
+
|
123
|
+
Note that the old configuration way *will* work if you set
|
124
|
+
`config.phobos_config_file = "config/phobos.yml"`. You will
|
125
|
+
get a number of deprecation notices, however. You can also still
|
126
|
+
set the topic, namespace, etc. on the producer/consumer class,
|
127
|
+
but it's much more convenient to centralize these configs
|
128
|
+
in one place to see what your app does.
|
@@ -0,0 +1,95 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'active_support'
|
4
|
+
|
5
|
+
require 'phobos'
|
6
|
+
require 'deimos/version'
|
7
|
+
require 'deimos/config/configuration'
|
8
|
+
require 'deimos/producer'
|
9
|
+
require 'deimos/active_record_producer'
|
10
|
+
require 'deimos/active_record_consumer'
|
11
|
+
require 'deimos/consumer'
|
12
|
+
require 'deimos/batch_consumer'
|
13
|
+
require 'deimos/instrumentation'
|
14
|
+
require 'deimos/utils/lag_reporter'
|
15
|
+
|
16
|
+
require 'deimos/backends/base'
|
17
|
+
require 'deimos/backends/kafka'
|
18
|
+
require 'deimos/backends/kafka_async'
|
19
|
+
require 'deimos/backends/test'
|
20
|
+
|
21
|
+
require 'deimos/schema_backends/base'
|
22
|
+
|
23
|
+
require 'deimos/monkey_patches/phobos_producer'
|
24
|
+
require 'deimos/monkey_patches/phobos_cli'
|
25
|
+
|
26
|
+
require 'deimos/railtie' if defined?(Rails)
|
27
|
+
require 'deimos/utils/schema_controller_mixin' if defined?(ActionController)
|
28
|
+
|
29
|
+
if defined?(ActiveRecord)
|
30
|
+
require 'deimos/kafka_source'
|
31
|
+
require 'deimos/kafka_topic_info'
|
32
|
+
require 'deimos/backends/db'
|
33
|
+
require 'sigurd/signal_handler'
|
34
|
+
require 'sigurd/executor'
|
35
|
+
require 'deimos/utils/db_producer'
|
36
|
+
require 'deimos/utils/db_poller'
|
37
|
+
end
|
38
|
+
|
39
|
+
require 'deimos/utils/inline_consumer'
|
40
|
+
require 'yaml'
|
41
|
+
require 'erb'
|
42
|
+
|
43
|
+
# Parent module.
|
44
|
+
module Deimos
|
45
|
+
class << self
|
46
|
+
# @return [Class < Deimos::SchemaBackends::Base]
|
47
|
+
def schema_backend_class
|
48
|
+
backend = Deimos.config.schema.backend.to_s
|
49
|
+
|
50
|
+
require "deimos/schema_backends/#{backend}"
|
51
|
+
|
52
|
+
"Deimos::SchemaBackends::#{backend.classify}".constantize
|
53
|
+
end
|
54
|
+
|
55
|
+
# @param schema [String|Symbol]
|
56
|
+
# @param namespace [String]
|
57
|
+
# @return [Deimos::SchemaBackends::Base]
|
58
|
+
def schema_backend(schema:, namespace:)
|
59
|
+
schema_backend_class.new(schema: schema, namespace: namespace)
|
60
|
+
end
|
61
|
+
|
62
|
+
# Start the DB producers to send Kafka messages.
|
63
|
+
# @param thread_count [Integer] the number of threads to start.
|
64
|
+
def start_db_backend!(thread_count: 1)
|
65
|
+
if self.config.producers.backend != :db
|
66
|
+
raise('Publish backend is not set to :db, exiting')
|
67
|
+
end
|
68
|
+
|
69
|
+
if thread_count.nil? || thread_count.zero?
|
70
|
+
raise('Thread count is not given or set to zero, exiting')
|
71
|
+
end
|
72
|
+
|
73
|
+
producers = (1..thread_count).map do
|
74
|
+
Deimos::Utils::DbProducer.
|
75
|
+
new(self.config.db_producer.logger || self.config.logger)
|
76
|
+
end
|
77
|
+
executor = Sigurd::Executor.new(producers,
|
78
|
+
sleep_seconds: 5,
|
79
|
+
logger: self.config.logger)
|
80
|
+
signal_handler = Sigurd::SignalHandler.new(executor)
|
81
|
+
signal_handler.run!
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
at_exit do
|
87
|
+
begin
|
88
|
+
Deimos::Backends::KafkaAsync.shutdown_producer
|
89
|
+
Deimos::Backends::Kafka.shutdown_producer
|
90
|
+
rescue StandardError => e
|
91
|
+
Deimos.config.logger.error(
|
92
|
+
"Error closing producer on shutdown: #{e.message} #{e.backtrace.join("\n")}"
|
93
|
+
)
|
94
|
+
end
|
95
|
+
end
|