deimos-temp-fork 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.circleci/config.yml +83 -0
- data/.gitignore +41 -0
- data/.gitmodules +0 -0
- data/.rspec +1 -0
- data/.rubocop.yml +333 -0
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/CHANGELOG.md +349 -0
- data/CODE_OF_CONDUCT.md +77 -0
- data/Dockerfile +23 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +286 -0
- data/Guardfile +22 -0
- data/LICENSE.md +195 -0
- data/README.md +1099 -0
- data/Rakefile +13 -0
- data/bin/deimos +4 -0
- data/deimos-ruby.gemspec +44 -0
- data/docker-compose.yml +71 -0
- data/docs/ARCHITECTURE.md +140 -0
- data/docs/CONFIGURATION.md +236 -0
- data/docs/DATABASE_BACKEND.md +147 -0
- data/docs/INTEGRATION_TESTS.md +52 -0
- data/docs/PULL_REQUEST_TEMPLATE.md +35 -0
- data/docs/UPGRADING.md +128 -0
- data/lib/deimos-temp-fork.rb +95 -0
- data/lib/deimos/active_record_consume/batch_consumption.rb +164 -0
- data/lib/deimos/active_record_consume/batch_slicer.rb +27 -0
- data/lib/deimos/active_record_consume/message_consumption.rb +79 -0
- data/lib/deimos/active_record_consume/schema_model_converter.rb +52 -0
- data/lib/deimos/active_record_consumer.rb +67 -0
- data/lib/deimos/active_record_producer.rb +87 -0
- data/lib/deimos/backends/base.rb +32 -0
- data/lib/deimos/backends/db.rb +41 -0
- data/lib/deimos/backends/kafka.rb +33 -0
- data/lib/deimos/backends/kafka_async.rb +33 -0
- data/lib/deimos/backends/test.rb +20 -0
- data/lib/deimos/batch_consumer.rb +7 -0
- data/lib/deimos/config/configuration.rb +381 -0
- data/lib/deimos/config/phobos_config.rb +137 -0
- data/lib/deimos/consume/batch_consumption.rb +150 -0
- data/lib/deimos/consume/message_consumption.rb +94 -0
- data/lib/deimos/consumer.rb +104 -0
- data/lib/deimos/instrumentation.rb +76 -0
- data/lib/deimos/kafka_message.rb +60 -0
- data/lib/deimos/kafka_source.rb +128 -0
- data/lib/deimos/kafka_topic_info.rb +102 -0
- data/lib/deimos/message.rb +79 -0
- data/lib/deimos/metrics/datadog.rb +47 -0
- data/lib/deimos/metrics/mock.rb +39 -0
- data/lib/deimos/metrics/provider.rb +36 -0
- data/lib/deimos/monkey_patches/phobos_cli.rb +35 -0
- data/lib/deimos/monkey_patches/phobos_producer.rb +51 -0
- data/lib/deimos/poll_info.rb +9 -0
- data/lib/deimos/producer.rb +224 -0
- data/lib/deimos/railtie.rb +8 -0
- data/lib/deimos/schema_backends/avro_base.rb +140 -0
- data/lib/deimos/schema_backends/avro_local.rb +30 -0
- data/lib/deimos/schema_backends/avro_schema_coercer.rb +119 -0
- data/lib/deimos/schema_backends/avro_schema_registry.rb +34 -0
- data/lib/deimos/schema_backends/avro_validation.rb +21 -0
- data/lib/deimos/schema_backends/base.rb +150 -0
- data/lib/deimos/schema_backends/mock.rb +42 -0
- data/lib/deimos/shared_config.rb +63 -0
- data/lib/deimos/test_helpers.rb +360 -0
- data/lib/deimos/tracing/datadog.rb +35 -0
- data/lib/deimos/tracing/mock.rb +40 -0
- data/lib/deimos/tracing/provider.rb +29 -0
- data/lib/deimos/utils/db_poller.rb +150 -0
- data/lib/deimos/utils/db_producer.rb +243 -0
- data/lib/deimos/utils/deadlock_retry.rb +68 -0
- data/lib/deimos/utils/inline_consumer.rb +150 -0
- data/lib/deimos/utils/lag_reporter.rb +175 -0
- data/lib/deimos/utils/schema_controller_mixin.rb +115 -0
- data/lib/deimos/version.rb +5 -0
- data/lib/generators/deimos/active_record/templates/migration.rb.tt +28 -0
- data/lib/generators/deimos/active_record/templates/model.rb.tt +5 -0
- data/lib/generators/deimos/active_record_generator.rb +79 -0
- data/lib/generators/deimos/db_backend/templates/migration +25 -0
- data/lib/generators/deimos/db_backend/templates/rails3_migration +31 -0
- data/lib/generators/deimos/db_backend_generator.rb +48 -0
- data/lib/generators/deimos/db_poller/templates/migration +11 -0
- data/lib/generators/deimos/db_poller/templates/rails3_migration +16 -0
- data/lib/generators/deimos/db_poller_generator.rb +48 -0
- data/lib/tasks/deimos.rake +34 -0
- data/spec/active_record_batch_consumer_spec.rb +481 -0
- data/spec/active_record_consume/batch_slicer_spec.rb +42 -0
- data/spec/active_record_consume/schema_model_converter_spec.rb +105 -0
- data/spec/active_record_consumer_spec.rb +154 -0
- data/spec/active_record_producer_spec.rb +85 -0
- data/spec/backends/base_spec.rb +10 -0
- data/spec/backends/db_spec.rb +54 -0
- data/spec/backends/kafka_async_spec.rb +11 -0
- data/spec/backends/kafka_spec.rb +11 -0
- data/spec/batch_consumer_spec.rb +256 -0
- data/spec/config/configuration_spec.rb +248 -0
- data/spec/consumer_spec.rb +209 -0
- data/spec/deimos_spec.rb +169 -0
- data/spec/generators/active_record_generator_spec.rb +56 -0
- data/spec/handlers/my_batch_consumer.rb +10 -0
- data/spec/handlers/my_consumer.rb +10 -0
- data/spec/kafka_listener_spec.rb +55 -0
- data/spec/kafka_source_spec.rb +381 -0
- data/spec/kafka_topic_info_spec.rb +111 -0
- data/spec/message_spec.rb +19 -0
- data/spec/phobos.bad_db.yml +73 -0
- data/spec/phobos.yml +77 -0
- data/spec/producer_spec.rb +498 -0
- data/spec/rake_spec.rb +19 -0
- data/spec/schema_backends/avro_base_shared.rb +199 -0
- data/spec/schema_backends/avro_local_spec.rb +32 -0
- data/spec/schema_backends/avro_schema_registry_spec.rb +32 -0
- data/spec/schema_backends/avro_validation_spec.rb +24 -0
- data/spec/schema_backends/base_spec.rb +33 -0
- data/spec/schemas/com/my-namespace/Generated.avsc +71 -0
- data/spec/schemas/com/my-namespace/MyNestedSchema.avsc +62 -0
- data/spec/schemas/com/my-namespace/MySchema-key.avsc +13 -0
- data/spec/schemas/com/my-namespace/MySchema.avsc +18 -0
- data/spec/schemas/com/my-namespace/MySchemaCompound-key.avsc +18 -0
- data/spec/schemas/com/my-namespace/MySchemaWithBooleans.avsc +18 -0
- data/spec/schemas/com/my-namespace/MySchemaWithDateTimes.avsc +33 -0
- data/spec/schemas/com/my-namespace/MySchemaWithId.avsc +28 -0
- data/spec/schemas/com/my-namespace/MySchemaWithUniqueId.avsc +32 -0
- data/spec/schemas/com/my-namespace/Wibble.avsc +43 -0
- data/spec/schemas/com/my-namespace/Widget.avsc +27 -0
- data/spec/schemas/com/my-namespace/WidgetTheSecond.avsc +27 -0
- data/spec/schemas/com/my-namespace/request/CreateTopic.avsc +11 -0
- data/spec/schemas/com/my-namespace/request/Index.avsc +11 -0
- data/spec/schemas/com/my-namespace/request/UpdateRequest.avsc +11 -0
- data/spec/schemas/com/my-namespace/response/CreateTopic.avsc +11 -0
- data/spec/schemas/com/my-namespace/response/Index.avsc +11 -0
- data/spec/schemas/com/my-namespace/response/UpdateResponse.avsc +11 -0
- data/spec/spec_helper.rb +267 -0
- data/spec/utils/db_poller_spec.rb +320 -0
- data/spec/utils/db_producer_spec.rb +514 -0
- data/spec/utils/deadlock_retry_spec.rb +74 -0
- data/spec/utils/inline_consumer_spec.rb +31 -0
- data/spec/utils/lag_reporter_spec.rb +76 -0
- data/spec/utils/platform_schema_validation_spec.rb +0 -0
- data/spec/utils/schema_controller_mixin_spec.rb +84 -0
- data/support/deimos-solo.png +0 -0
- data/support/deimos-with-name-next.png +0 -0
- data/support/deimos-with-name.png +0 -0
- data/support/flipp-logo.png +0 -0
- metadata +551 -0
data/Rakefile
ADDED
data/bin/deimos
ADDED
data/deimos-ruby.gemspec
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
lib = File.expand_path('lib', __dir__)
|
4
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
|
+
require 'deimos/version'
|
6
|
+
|
7
|
+
Gem::Specification.new do |spec|
|
8
|
+
spec.name = 'deimos-temp-fork'
|
9
|
+
spec.version = Deimos::VERSION
|
10
|
+
spec.authors = ['Daniel Orner']
|
11
|
+
spec.email = ['daniel.orner@wishabi.com']
|
12
|
+
spec.summary = 'Kafka libraries for Ruby.'
|
13
|
+
spec.homepage = ''
|
14
|
+
spec.license = 'Apache-2.0'
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ['lib']
|
20
|
+
|
21
|
+
spec.add_runtime_dependency('avro_turf', '~> 0.11')
|
22
|
+
spec.add_runtime_dependency('phobos_temp_fork')
|
23
|
+
spec.add_runtime_dependency('sigurd', '~> 0.0.1')
|
24
|
+
spec.add_runtime_dependency('fig_tree', '~> 0.0.2')
|
25
|
+
|
26
|
+
spec.add_development_dependency('activerecord-import')
|
27
|
+
spec.add_development_dependency('avro', '~> 1.9')
|
28
|
+
spec.add_development_dependency('database_cleaner', '~> 1.7')
|
29
|
+
spec.add_development_dependency('ddtrace', '~> 0.11')
|
30
|
+
spec.add_development_dependency('dogstatsd-ruby', '~> 4.2')
|
31
|
+
spec.add_development_dependency('guard', '~> 2')
|
32
|
+
spec.add_development_dependency('guard-rspec', '~> 4')
|
33
|
+
spec.add_development_dependency('guard-rubocop', '~> 1')
|
34
|
+
spec.add_development_dependency('mysql2', '~> 0.5')
|
35
|
+
spec.add_development_dependency('pg', '~> 1.1')
|
36
|
+
spec.add_development_dependency('rails', '~> 6')
|
37
|
+
spec.add_development_dependency('rake', '~> 13')
|
38
|
+
spec.add_development_dependency('rspec', '~> 3')
|
39
|
+
spec.add_development_dependency('rspec_junit_formatter', '~>0.3')
|
40
|
+
spec.add_development_dependency('rspec-rails', '~> 4')
|
41
|
+
spec.add_development_dependency('rubocop', '0.88.0')
|
42
|
+
spec.add_development_dependency('rubocop-rspec', '1.42.0')
|
43
|
+
spec.add_development_dependency('sqlite3', '~> 1.3')
|
44
|
+
end
|
data/docker-compose.yml
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
version: '3.6'
|
2
|
+
services:
|
3
|
+
mysql:
|
4
|
+
image: mysql:5.7
|
5
|
+
expose:
|
6
|
+
- 3306
|
7
|
+
environment:
|
8
|
+
- MYSQL_ALLOW_EMPTY_PASSWORD=yes
|
9
|
+
- MYSQL_DATABASE=test
|
10
|
+
- TZ=America/Toronto
|
11
|
+
|
12
|
+
postgres:
|
13
|
+
image: postgres:11.1
|
14
|
+
expose:
|
15
|
+
- 5432
|
16
|
+
environment:
|
17
|
+
POSTGRES_PASSWORD: root
|
18
|
+
|
19
|
+
test:
|
20
|
+
volumes:
|
21
|
+
- .:/var/app
|
22
|
+
depends_on:
|
23
|
+
- kafka-broker
|
24
|
+
- mysql
|
25
|
+
- postgres
|
26
|
+
build: .
|
27
|
+
environment:
|
28
|
+
- "DEFAULT_TIMEOUT=${DEFAULT_TIMEOUT}"
|
29
|
+
- MYSQL_HOST=mysql
|
30
|
+
- PG_HOST=postgres
|
31
|
+
- SCHEMA_REGISTRY=http://schema-registry:8081
|
32
|
+
- KAFKA_SEED_BROKER=kafka-broker:9092
|
33
|
+
command: dockerize -wait tcp://mysql:3306 -wait tcp://postgres:5432 -timeout 1m rspec
|
34
|
+
|
35
|
+
zookeeper:
|
36
|
+
image: wurstmeister/zookeeper:latest
|
37
|
+
ports:
|
38
|
+
- 2181:2181
|
39
|
+
|
40
|
+
schema-registry:
|
41
|
+
image: confluentinc/cp-schema-registry
|
42
|
+
hostname: schema-registry
|
43
|
+
depends_on:
|
44
|
+
- zookeeper
|
45
|
+
- kafka-broker
|
46
|
+
ports:
|
47
|
+
- "8081:8081"
|
48
|
+
environment:
|
49
|
+
SCHEMA_REGISTRY_HOST_NAME: schema-registry
|
50
|
+
SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL: 'zookeeper:2181'
|
51
|
+
|
52
|
+
kafka-broker:
|
53
|
+
image: confluentinc/cp-enterprise-kafka
|
54
|
+
hostname: kafka-broker
|
55
|
+
depends_on:
|
56
|
+
- zookeeper
|
57
|
+
ports:
|
58
|
+
- "9092:9092"
|
59
|
+
environment:
|
60
|
+
KAFKA_BROKER_ID: 1
|
61
|
+
KAFKA_ZOOKEEPER_CONNECT: 'zookeeper:2181'
|
62
|
+
KAFKA_ADVERTISED_LISTENERS: 'PLAINTEXT://kafka-broker:9092'
|
63
|
+
KAFKA_METRIC_REPORTERS: io.confluent.metrics.reporter.ConfluentMetricsReporter
|
64
|
+
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
|
65
|
+
KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0
|
66
|
+
CONFLUENT_METRICS_REPORTER_BOOTSTRAP_SERVERS: kafka-broker:9092
|
67
|
+
CONFLUENT_METRICS_REPORTER_ZOOKEEPER_CONNECT: zookeeper:2181
|
68
|
+
CONFLUENT_METRICS_REPORTER_TOPIC_REPLICAS: 1
|
69
|
+
CONFLUENT_METRICS_ENABLE: 'true'
|
70
|
+
CONFLUENT_SUPPORT_CUSTOMER_ID: 'anonymous'
|
71
|
+
|
@@ -0,0 +1,140 @@
|
|
1
|
+
# Deimos Architecture
|
2
|
+
|
3
|
+
Deimos is the third of three libraries that add functionality on top of each
|
4
|
+
other:
|
5
|
+
|
6
|
+
* [RubyKafka](https://github.com/zendesk/ruby-kafka) is the low-level Kafka
|
7
|
+
client, providing API's for producers, consumers and the client as a whole.
|
8
|
+
* [Phobos](https://github.com/phobos/phobos) is a lightweight wrapper on top
|
9
|
+
of RubyKafka that provides threaded consumers, a simpler way to write
|
10
|
+
producers, and lifecycle management.
|
11
|
+
* [Deimos](https://github.com/flipp-oss/deimos/) is a full-featured framework
|
12
|
+
using Phobos as its base which provides schema integration (e.g. Avro),
|
13
|
+
database integration, metrics, tracing, test helpers and other utilities.
|
14
|
+
|
15
|
+
## Folder structure
|
16
|
+
|
17
|
+
As of May 12, 2020, the following are the important files to understand in how
|
18
|
+
Deimos fits together:
|
19
|
+
* `lib/generators`: Generators to generate database migrations, e.g.
|
20
|
+
for the DB Poller and DB Producer features.
|
21
|
+
* `lib/tasks`: Rake tasks for starting consumers, DB Pollers, etc.
|
22
|
+
* `lib/deimos`: Main Deimos code.
|
23
|
+
* `lib/deimos/deimos.rb`: The bootstrap / startup code for Deimos. Also provides
|
24
|
+
some global convenience methods and (for legacy purposes) the way to
|
25
|
+
start the DB Producer.
|
26
|
+
* `lib/deimos/backends`: The different plug-in producer backends - e.g. produce
|
27
|
+
directly to Kafka, use the DB backend, etc.
|
28
|
+
* `lib/deimos/schema_backends`: The different plug-in schema handlers, such
|
29
|
+
as the various flavors of Avro (with/without schema registry etc.)
|
30
|
+
* `lib/deimos/metrics`: The different plug-in metrics providers, e.g. Datadog.
|
31
|
+
* `lib/deimos/tracing`: The different plug-in tracing providers, e.g. Datadog.
|
32
|
+
* `lib/deimos/utils`: Utility classes for things not directly related to
|
33
|
+
producing and consuming, such as the DB Poller, DB Producer, lag reporter, etc.
|
34
|
+
* `lib/deimos/config`: Classes related to configuring Deimos.
|
35
|
+
* `lib/deimos/monkey_patches`: Monkey patches to existing libraries. These
|
36
|
+
should be removed in a future update.
|
37
|
+
|
38
|
+
## Features
|
39
|
+
|
40
|
+
### Producers and Consumers
|
41
|
+
|
42
|
+
Both producers and consumers include the `SharedConfig` module, which
|
43
|
+
standardizes configuration like schema settings, topic, keys, etc.
|
44
|
+
|
45
|
+
Consumers come in two flavors: `Consumer` and `BatchConsumer`. Both include
|
46
|
+
`BaseConsumer` for shared functionality.
|
47
|
+
|
48
|
+
While producing messages go to Kafka by default, literally anything else
|
49
|
+
can happen when your producer calls `produce`, by swapping out the producer
|
50
|
+
_backend_. This is just a file that needs to inherit from `Deimos::Backends::Base`
|
51
|
+
and must implement a single method, `execute`.
|
52
|
+
|
53
|
+
Producers have a complex workflow while processing the payload to publish. This
|
54
|
+
is aided by the `Deimos::Message` class (not to be confused with the
|
55
|
+
`KafkaMessage` class, which is an ActiveRecord used by the DB Producer feature,
|
56
|
+
below).
|
57
|
+
|
58
|
+
### Schemas
|
59
|
+
|
60
|
+
Schema backends are used to encode and decode payloads into different formats
|
61
|
+
such as Avro. These are integrated with producers and consumers, as well
|
62
|
+
as test helpers. These are a bit more involved than producer backends, and
|
63
|
+
must define methods such as:
|
64
|
+
* `encode` a payload or key (when encoding a key, for Avro a key schema
|
65
|
+
may be auto-generated)
|
66
|
+
* `decode` a payload or key
|
67
|
+
* `validate` that a payload is correct for encoding
|
68
|
+
* `coerce` a payload into the given schema (e.g. turn ints into strings)
|
69
|
+
* Get a list of `schema_fields` in the configured schema, used when interacting
|
70
|
+
with ActiveRecord
|
71
|
+
* Define a `mock` backend when the given backend is used. This is used
|
72
|
+
during testing. Typically mock backends will validate values but not
|
73
|
+
actually encode/decode them.
|
74
|
+
|
75
|
+
### Configuration
|
76
|
+
|
77
|
+
Deimos uses the [https://www.github.com/flipp_oss/fig_tree](fig_tree) gem for configuration.
|
78
|
+
|
79
|
+
The configuration definition for Deimos is in `config/configuration.rb`. In
|
80
|
+
addition, there are methods in `config/phobos_config.rb` which translate to/from
|
81
|
+
the Phobos configuration format and support the old `phobos.yml` method
|
82
|
+
of configuration.
|
83
|
+
|
84
|
+
### Metrics and Tracing
|
85
|
+
|
86
|
+
These are simpler than other plugins and must implement the expected methods
|
87
|
+
(`increment`, `gauge`, `histogram` and `time` for metrics, and `start`, `finish`
|
88
|
+
and `set_error` for tracing). These are used primarily in producers and consumers.
|
89
|
+
|
90
|
+
### ActiveRecord Integration
|
91
|
+
|
92
|
+
Deimos provides an `ActiveRecordConsumer` and `ActiveRecordProducer`. These are
|
93
|
+
relatively lightweight ways to save data into a database or read it off
|
94
|
+
the database as part of app logic. It uses things like the `coerce` method
|
95
|
+
of the schema backends to manage the differences between the given payload
|
96
|
+
and the configured schema for the topic.
|
97
|
+
|
98
|
+
### Database Backend / Database Producer
|
99
|
+
|
100
|
+
This feature (which provides better performance and transaction guarantees)
|
101
|
+
is powered by two components:
|
102
|
+
* The `db` _publish backend_, which saves messages to the database rather
|
103
|
+
than to Kafka;
|
104
|
+
* The `DbProducer` utility, which runs as a separate process, pulls data
|
105
|
+
from the database and sends it to Kafka.
|
106
|
+
|
107
|
+
There are a set of utility classes that power the producer, which are largely
|
108
|
+
copied from Phobos:
|
109
|
+
* `Executor` takes a set of "runnable" things (which implement a `start` and `stop`
|
110
|
+
method) puts them in a thread pool and runs them all concurrently. It
|
111
|
+
manages starting and stopping all threads when necessary.
|
112
|
+
* `SignalHandler` wraps the Executor and handles SIGINT and SIGTERM signals
|
113
|
+
to stop the executor gracefully.
|
114
|
+
|
115
|
+
In the case of this feature, the `DbProducer` is the runnable object - it
|
116
|
+
can run several threads at once.
|
117
|
+
|
118
|
+
On the database side, the `ActiveRecord` models that power this feature are:
|
119
|
+
* `KafkaMessage`: The actual message, saved to the database. This message
|
120
|
+
is already encoded by the producer, so only has to be sent.
|
121
|
+
* `KafkaTopicInfo`: Used for locking topics so only one producer can work
|
122
|
+
on it at once.
|
123
|
+
|
124
|
+
A Rake task (defined in `deimos.rake`) can be used to start the producer.
|
125
|
+
|
126
|
+
### Database Poller
|
127
|
+
|
128
|
+
This feature (which periodically polls the database to send Kafka messages)
|
129
|
+
primarily uses other aspects of Deimos and hence is relatively small in size.
|
130
|
+
The `DbPoller` class acts as a "runnable" and is used by an Executor (above).
|
131
|
+
The `PollInfo` class is saved to the database to keep track of where each
|
132
|
+
poller is up to.
|
133
|
+
|
134
|
+
A Rake task (defined in `deimos.rake`) can be used to start the pollers.
|
135
|
+
|
136
|
+
### Other Utilities
|
137
|
+
|
138
|
+
The `utils` folder also contains the `LagReporter` (which sends metrics on
|
139
|
+
lag) and the `InlineConsumer`, which can read data from a topic and directly
|
140
|
+
pass it into a handler or save it to memory.
|
@@ -0,0 +1,236 @@
|
|
1
|
+
# Configuration
|
2
|
+
|
3
|
+
Deimos supports a succinct, readable syntax which uses
|
4
|
+
pure Ruby to allow flexible configuration.
|
5
|
+
|
6
|
+
You can access any configuration value via a simple `Deimos.config.whatever`.
|
7
|
+
|
8
|
+
Nested configuration is denoted in simple dot notation:
|
9
|
+
`kafka.ssl.enabled`. Headings below will follow the nested
|
10
|
+
configurations.
|
11
|
+
|
12
|
+
## Base Configuration
|
13
|
+
Config name|Default|Description
|
14
|
+
-----------|-------|-----------
|
15
|
+
logger|`Logger.new(STDOUT)`|The logger that Deimos will use.
|
16
|
+
phobos_logger|`Deimos.config.logger`|The logger passed to Phobos.
|
17
|
+
metrics|`Deimos::Metrics::Mock.new`|The metrics backend use for reporting.
|
18
|
+
tracer|`Deimos::Tracer::Mock.new`|The tracer backend used for debugging.
|
19
|
+
|
20
|
+
## Defining Producers
|
21
|
+
|
22
|
+
You can define a new producer thusly:
|
23
|
+
```ruby
|
24
|
+
Deimos.configure do
|
25
|
+
producer do
|
26
|
+
class_name 'MyProducer'
|
27
|
+
topic 'MyTopic'
|
28
|
+
schema 'MyTopicSchema'
|
29
|
+
namespace 'my.namespace'
|
30
|
+
key_config field: :id
|
31
|
+
|
32
|
+
# If config.schema.path is app/schemas, assumes there is a file in
|
33
|
+
# app/schemas/my/namespace/MyTopicSchema.avsc
|
34
|
+
end
|
35
|
+
end
|
36
|
+
```
|
37
|
+
|
38
|
+
You can have as many `producer` blocks as you like to define more producers.
|
39
|
+
|
40
|
+
Config name|Default|Description
|
41
|
+
-----------|-------|-----------
|
42
|
+
class_name|nil|Class name of the producer class (subclass of `Deimos::Producer`.)
|
43
|
+
topic|nil|Topic to produce to.
|
44
|
+
schema|nil|Name of the schema to use to encode data before producing.
|
45
|
+
namespace|nil|Namespace of the schema to use when finding it locally.
|
46
|
+
key_config|nil|Configuration hash for message keys. See [Kafka Message Keys](../README.md#installation)
|
47
|
+
|
48
|
+
## Defining Consumers
|
49
|
+
|
50
|
+
Consumers are defined almost identically to producers:
|
51
|
+
|
52
|
+
```ruby
|
53
|
+
Deimos.configure do
|
54
|
+
consumer do
|
55
|
+
class_name 'MyConsumer'
|
56
|
+
topic 'MyTopic'
|
57
|
+
schema 'MyTopicSchema'
|
58
|
+
namespace 'my.namespace'
|
59
|
+
key_config field: :id
|
60
|
+
|
61
|
+
# Setting to :inline_batch will invoke consume_batch instead of consume
|
62
|
+
# for each batch of messages.
|
63
|
+
delivery :batch
|
64
|
+
|
65
|
+
# If config.schema.path is app/schemas, assumes there is a file in
|
66
|
+
# app/schemas/my/namespace/MyTopicSchema.avsc
|
67
|
+
end
|
68
|
+
end
|
69
|
+
```
|
70
|
+
|
71
|
+
In addition to the producer configs, you can define a number of overrides
|
72
|
+
to the basic consumer configuration for each consumer. This is analogous to
|
73
|
+
the `listener` config in `phobos.yml`.
|
74
|
+
|
75
|
+
Config name|Default|Description
|
76
|
+
-----------|-------|-----------
|
77
|
+
class_name|nil|Class name of the consumer class (subclass of `Deimos::Consumer`.)
|
78
|
+
topic|nil|Topic to produce to.
|
79
|
+
schema|nil|This is optional but strongly recommended for testing purposes; this will validate against a local schema file used as the reader schema, as well as being able to write tests against this schema. This is recommended since it ensures you are always getting the values you expect.
|
80
|
+
namespace|nil|Namespace of the schema to use when finding it locally.
|
81
|
+
key_config|nil|Configuration hash for message keys. See [Kafka Message Keys](../README.md#installation)
|
82
|
+
disabled|false|Set to true to skip starting an actual listener for this consumer on startup.
|
83
|
+
group_id|nil|ID of the consumer group.
|
84
|
+
max_concurrency|1|Number of threads created for this listener. Each thread will behave as an independent consumer. They don't share any state.
|
85
|
+
start_from_beginning|true|Once the consumer group has checkpointed its progress in the topic's partitions, the consumers will always start from the checkpointed offsets, regardless of config. As such, this setting only applies when the consumer initially starts consuming from a topic
|
86
|
+
max_bytes_per_partition|512.kilobytes|Maximum amount of data fetched from a single partition at a time.
|
87
|
+
min_bytes|1|Minimum number of bytes to read before returning messages from the server; if `max_wait_time` is reached, this is ignored.
|
88
|
+
max_wait_time|5|Maximum duration of time to wait before returning messages from the server, in seconds.
|
89
|
+
force_encoding|nil|Apply this encoding to the message payload. If blank it uses the original encoding. This property accepts values defined by the ruby Encoding class (https://ruby-doc.org/core-2.3.0/Encoding.html). Ex: UTF_8, ASCII_8BIT, etc.
|
90
|
+
delivery|`:batch`|The delivery mode for the consumer. Possible values: `:message, :batch, :inline_batch`. See Phobos documentation for more details.
|
91
|
+
session_timeout|300|Number of seconds after which, if a client hasn't contacted the Kafka cluster, it will be kicked out of the group.
|
92
|
+
offset_commit_interval|10|Interval between offset commits, in seconds.
|
93
|
+
offset_commit_threshold|0|Number of messages that can be processed before their offsets are committed. If zero, offset commits are not triggered by message processing
|
94
|
+
offset_retention_time|nil|The time period that committed offsets will be retained, in seconds. Defaults to the broker setting.
|
95
|
+
heartbeat_interval|10|Interval between heartbeats; must be less than the session window.
|
96
|
+
backoff|`(1000..60_000)`|Range representing the minimum and maximum number of milliseconds to back off after a consumer error.
|
97
|
+
|
98
|
+
## Defining Database Pollers
|
99
|
+
|
100
|
+
These are used when polling the database via `rake deimos:db_poller`. You
|
101
|
+
can create a number of pollers, one per topic.
|
102
|
+
|
103
|
+
```ruby
|
104
|
+
Deimos.configure do
|
105
|
+
db_poller do
|
106
|
+
producer_class 'MyProducer'
|
107
|
+
run_every 2.minutes
|
108
|
+
end
|
109
|
+
end
|
110
|
+
```
|
111
|
+
|
112
|
+
Config name|Default|Description
|
113
|
+
-----------|-------|-----------
|
114
|
+
producer_class|nil|ActiveRecordProducer class to use for sending messages.
|
115
|
+
run_every|60|Amount of time in seconds to wait between runs.
|
116
|
+
timestamp_column|`:updated_at`|Name of the column to query. Remember to add an index to this column!
|
117
|
+
delay_time|2|Amount of time in seconds to wait before picking up records, to allow for transactions to finish.
|
118
|
+
full_table|false|If set to true, do a full table dump to Kafka each run. Good for very small tables.
|
119
|
+
start_from_beginning|true|If false, start from the current time instead of the beginning of time if this is the first time running the poller.
|
120
|
+
|
121
|
+
## Kafka Configuration
|
122
|
+
|
123
|
+
Config name|Default|Description
|
124
|
+
-----------|-------|-----------
|
125
|
+
kafka.logger|`Deimos.config.logger`|Logger passed to RubyKafka.
|
126
|
+
kafka.seed_brokers|`['localhost:9092']`|URL for the Kafka brokers.
|
127
|
+
kafka.client_id|`phobos`|Identifier for this application.
|
128
|
+
kafka.connect_timeout|15|The socket timeout for connecting to the broker, in seconds.
|
129
|
+
kafka.socket_timeout|15|The socket timeout for reading and writing to the broker, in seconds.
|
130
|
+
kafka.ssl.enabled|false|Whether SSL is enabled on the brokers.
|
131
|
+
kafka.ssl.ca_cert|nil| A PEM encoded CA cert, a file path to the cert, or an Array of certs to use with an SSL connection.
|
132
|
+
kafka.ssl.client_cert|nil|A PEM encoded client cert to use with an SSL connection, or a file path to the cert.
|
133
|
+
kafka.ssl.client_cert_key|nil|A PEM encoded client cert key to use with an SSL connection.
|
134
|
+
|
135
|
+
## Consumer Configuration
|
136
|
+
|
137
|
+
These are top-level configuration settings, but they can be overridden
|
138
|
+
by individual consumers.
|
139
|
+
|
140
|
+
Config name|Default|Description
|
141
|
+
-----------|-------|-----------
|
142
|
+
consumers.session_timeout|300|Number of seconds after which, if a client hasn't contacted the Kafka cluster, it will be kicked out of the group.
|
143
|
+
consumers.offset_commit_interval|10|Interval between offset commits, in seconds.
|
144
|
+
consumers.offset_commit_threshold|0|Number of messages that can be processed before their offsets are committed. If zero, offset commits are not triggered by message processing
|
145
|
+
consumers.heartbeat_interval|10|Interval between heartbeats; must be less than the session window.
|
146
|
+
consumers.backoff|`(1000..60_000)`|Range representing the minimum and maximum number of milliseconds to back off after a consumer error.
|
147
|
+
consumers.reraise_errors|false|Default behavior is to swallow uncaught exceptions and log to the metrics provider. Set this to true to instead raise all errors. Note that raising an error will ensure that the message cannot be processed - if there is a bad message which will always raise that error, your consumer will not be able to proceed past it and will be stuck forever until you fix your code. See also the `fatal_error` configuration. This is automatically set to true when using the `TestHelpers` module in RSpec.
|
148
|
+
consumers.report_lag|false|Whether to send the `consumer_lag` metric. This requires an extra thread per consumer.
|
149
|
+
consumers.fatal_error|`proc { false }`|Block taking an exception, payload and metadata and returning true if this should be considered a fatal error and false otherwise. E.g. you can use this to always fail if the database is available. Not needed if reraise_errors is set to true.
|
150
|
+
|
151
|
+
## Producer Configuration
|
152
|
+
|
153
|
+
Config name|Default|Description
|
154
|
+
-----------|-------|-----------
|
155
|
+
producers.ack_timeout|5|Number of seconds a broker can wait for replicas to acknowledge a write before responding with a timeout.
|
156
|
+
producers.required_acks|1|Number of replicas that must acknowledge a write, or `:all` if all in-sync replicas must acknowledge.
|
157
|
+
producers.max_retries|2|Number of retries that should be attempted before giving up sending messages to the cluster. Does not include the original attempt.
|
158
|
+
producers.retry_backoff|1|Number of seconds to wait between retries.
|
159
|
+
producers.max_buffer_size|10_000|Number of messages allowed in the buffer before new writes will raise `BufferOverflow` exceptions.
|
160
|
+
producers.max_buffer_bytesize|10_000_000|Maximum size of the buffer in bytes. Attempting to produce messages when the buffer reaches this size will result in `BufferOverflow` being raised.
|
161
|
+
producers.compression_codec|nil|Name of the compression codec to use, or nil if no compression should be performed. Valid codecs: `:snappy` and `:gzip`
|
162
|
+
producers.compression_threshold|1|Number of messages that needs to be in a message set before it should be compressed. Note that message sets are per-partition rather than per-topic or per-producer.
|
163
|
+
producers.max_queue_size|10_000|Maximum number of messages allowed in the queue. Only used for async_producer.
|
164
|
+
producers.delivery_threshold|0|If greater than zero, the number of buffered messages that will automatically trigger a delivery. Only used for async_producer.
|
165
|
+
producers.delivery_interval|0|if greater than zero, the number of seconds between automatic message deliveries. Only used for async_producer.
|
166
|
+
producers.persistent_connections|false|Set this to true to keep the producer connection between publish calls. This can speed up subsequent messages by around 30%, but it does mean that you need to manually call sync_producer_shutdown before exiting, similar to async_producer_shutdown.
|
167
|
+
producers.schema_namespace|nil|Default namespace for all producers. Can remain nil. Individual producers can override.
|
168
|
+
producers.topic_prefix|nil|Add a prefix to all topic names. This can be useful if you're using the same Kafka broker for different environments that are producing the same topics.
|
169
|
+
producers.disabled|false|Disable all actual message producing. Generally more useful to use the `disable_producers` method instead.
|
170
|
+
producers.backend|`:kafka_async`|Currently can be set to `:db`, `:kafka`, or `:kafka_async`. If using Kafka directly, a good pattern is to set to async in your user-facing app, and sync in your consumers or delayed workers.
|
171
|
+
|
172
|
+
## Schema Configuration
|
173
|
+
|
174
|
+
Config name|Default|Description
|
175
|
+
-----------|-------|-----------
|
176
|
+
schema.backend|`:mock`|Backend representing the schema encoder/decoder. You can see a full list [here](../lib/deimos/schema_backends).
|
177
|
+
schema.registry_url|`http://localhost:8081`|URL of the Confluent schema registry.
|
178
|
+
schema.path|nil|Local path to find your schemas.
|
179
|
+
|
180
|
+
## Database Producer Configuration
|
181
|
+
|
182
|
+
Config name|Default|Description
|
183
|
+
-----------|-------|-----------
|
184
|
+
db_producer.logger|`Deimos.config.logger`|Logger to use inside the DB producer.
|
185
|
+
db_producer.log_topics|`[]`|List of topics to print full messages for, or `:all` to print all topics. This can introduce slowdown since it needs to decode each message using the schema registry.
|
186
|
+
db_producer.compact_topics|`[]`|List of topics to compact before sending, i.e. only send the last message with any given key in a batch. This is an optimization which mirrors what Kafka itself will do with compaction turned on but only within a single batch. You can also specify `:all` to compact all topics.
|
187
|
+
|
188
|
+
## Configuration Syntax
|
189
|
+
|
190
|
+
Sample:
|
191
|
+
|
192
|
+
```ruby
|
193
|
+
Deimos.configure do
|
194
|
+
logger Logger.new(STDOUT)
|
195
|
+
# Nested config field
|
196
|
+
kafka.seed_brokers ['my.kafka.broker:9092']
|
197
|
+
|
198
|
+
# Multiple nested config fields via block
|
199
|
+
consumers do
|
200
|
+
session_timeout 30
|
201
|
+
offset_commit_interval 10
|
202
|
+
end
|
203
|
+
|
204
|
+
# Define a new producer
|
205
|
+
producer do
|
206
|
+
class_name 'MyProducer'
|
207
|
+
topic 'MyTopic'
|
208
|
+
schema 'MyTopicSchema'
|
209
|
+
key_config field: :id
|
210
|
+
end
|
211
|
+
|
212
|
+
# Define another new producer
|
213
|
+
producer do
|
214
|
+
class_name 'AnotherProducer'
|
215
|
+
topic 'AnotherTopic'
|
216
|
+
schema 'AnotherSchema'
|
217
|
+
key_config plain: true
|
218
|
+
end
|
219
|
+
|
220
|
+
# Define a consumer
|
221
|
+
consumer do
|
222
|
+
class_name 'MyConsumer'
|
223
|
+
topic 'TopicToConsume'
|
224
|
+
schema 'ConsumerSchema'
|
225
|
+
key_config plain: true
|
226
|
+
# include Phobos / RubyKafka configs
|
227
|
+
start_from_beginning true
|
228
|
+
heartbeat_interval 10
|
229
|
+
end
|
230
|
+
|
231
|
+
end
|
232
|
+
```
|
233
|
+
|
234
|
+
Note that all blocks are evaluated in the context of the configuration object.
|
235
|
+
If you're calling this inside another class or method, you'll need to save
|
236
|
+
things you need to reference into local variables before calling `configure`.
|