deimos-temp-fork 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (146) hide show
  1. checksums.yaml +7 -0
  2. data/.circleci/config.yml +83 -0
  3. data/.gitignore +41 -0
  4. data/.gitmodules +0 -0
  5. data/.rspec +1 -0
  6. data/.rubocop.yml +333 -0
  7. data/.ruby-gemset +1 -0
  8. data/.ruby-version +1 -0
  9. data/CHANGELOG.md +349 -0
  10. data/CODE_OF_CONDUCT.md +77 -0
  11. data/Dockerfile +23 -0
  12. data/Gemfile +6 -0
  13. data/Gemfile.lock +286 -0
  14. data/Guardfile +22 -0
  15. data/LICENSE.md +195 -0
  16. data/README.md +1099 -0
  17. data/Rakefile +13 -0
  18. data/bin/deimos +4 -0
  19. data/deimos-ruby.gemspec +44 -0
  20. data/docker-compose.yml +71 -0
  21. data/docs/ARCHITECTURE.md +140 -0
  22. data/docs/CONFIGURATION.md +236 -0
  23. data/docs/DATABASE_BACKEND.md +147 -0
  24. data/docs/INTEGRATION_TESTS.md +52 -0
  25. data/docs/PULL_REQUEST_TEMPLATE.md +35 -0
  26. data/docs/UPGRADING.md +128 -0
  27. data/lib/deimos-temp-fork.rb +95 -0
  28. data/lib/deimos/active_record_consume/batch_consumption.rb +164 -0
  29. data/lib/deimos/active_record_consume/batch_slicer.rb +27 -0
  30. data/lib/deimos/active_record_consume/message_consumption.rb +79 -0
  31. data/lib/deimos/active_record_consume/schema_model_converter.rb +52 -0
  32. data/lib/deimos/active_record_consumer.rb +67 -0
  33. data/lib/deimos/active_record_producer.rb +87 -0
  34. data/lib/deimos/backends/base.rb +32 -0
  35. data/lib/deimos/backends/db.rb +41 -0
  36. data/lib/deimos/backends/kafka.rb +33 -0
  37. data/lib/deimos/backends/kafka_async.rb +33 -0
  38. data/lib/deimos/backends/test.rb +20 -0
  39. data/lib/deimos/batch_consumer.rb +7 -0
  40. data/lib/deimos/config/configuration.rb +381 -0
  41. data/lib/deimos/config/phobos_config.rb +137 -0
  42. data/lib/deimos/consume/batch_consumption.rb +150 -0
  43. data/lib/deimos/consume/message_consumption.rb +94 -0
  44. data/lib/deimos/consumer.rb +104 -0
  45. data/lib/deimos/instrumentation.rb +76 -0
  46. data/lib/deimos/kafka_message.rb +60 -0
  47. data/lib/deimos/kafka_source.rb +128 -0
  48. data/lib/deimos/kafka_topic_info.rb +102 -0
  49. data/lib/deimos/message.rb +79 -0
  50. data/lib/deimos/metrics/datadog.rb +47 -0
  51. data/lib/deimos/metrics/mock.rb +39 -0
  52. data/lib/deimos/metrics/provider.rb +36 -0
  53. data/lib/deimos/monkey_patches/phobos_cli.rb +35 -0
  54. data/lib/deimos/monkey_patches/phobos_producer.rb +51 -0
  55. data/lib/deimos/poll_info.rb +9 -0
  56. data/lib/deimos/producer.rb +224 -0
  57. data/lib/deimos/railtie.rb +8 -0
  58. data/lib/deimos/schema_backends/avro_base.rb +140 -0
  59. data/lib/deimos/schema_backends/avro_local.rb +30 -0
  60. data/lib/deimos/schema_backends/avro_schema_coercer.rb +119 -0
  61. data/lib/deimos/schema_backends/avro_schema_registry.rb +34 -0
  62. data/lib/deimos/schema_backends/avro_validation.rb +21 -0
  63. data/lib/deimos/schema_backends/base.rb +150 -0
  64. data/lib/deimos/schema_backends/mock.rb +42 -0
  65. data/lib/deimos/shared_config.rb +63 -0
  66. data/lib/deimos/test_helpers.rb +360 -0
  67. data/lib/deimos/tracing/datadog.rb +35 -0
  68. data/lib/deimos/tracing/mock.rb +40 -0
  69. data/lib/deimos/tracing/provider.rb +29 -0
  70. data/lib/deimos/utils/db_poller.rb +150 -0
  71. data/lib/deimos/utils/db_producer.rb +243 -0
  72. data/lib/deimos/utils/deadlock_retry.rb +68 -0
  73. data/lib/deimos/utils/inline_consumer.rb +150 -0
  74. data/lib/deimos/utils/lag_reporter.rb +175 -0
  75. data/lib/deimos/utils/schema_controller_mixin.rb +115 -0
  76. data/lib/deimos/version.rb +5 -0
  77. data/lib/generators/deimos/active_record/templates/migration.rb.tt +28 -0
  78. data/lib/generators/deimos/active_record/templates/model.rb.tt +5 -0
  79. data/lib/generators/deimos/active_record_generator.rb +79 -0
  80. data/lib/generators/deimos/db_backend/templates/migration +25 -0
  81. data/lib/generators/deimos/db_backend/templates/rails3_migration +31 -0
  82. data/lib/generators/deimos/db_backend_generator.rb +48 -0
  83. data/lib/generators/deimos/db_poller/templates/migration +11 -0
  84. data/lib/generators/deimos/db_poller/templates/rails3_migration +16 -0
  85. data/lib/generators/deimos/db_poller_generator.rb +48 -0
  86. data/lib/tasks/deimos.rake +34 -0
  87. data/spec/active_record_batch_consumer_spec.rb +481 -0
  88. data/spec/active_record_consume/batch_slicer_spec.rb +42 -0
  89. data/spec/active_record_consume/schema_model_converter_spec.rb +105 -0
  90. data/spec/active_record_consumer_spec.rb +154 -0
  91. data/spec/active_record_producer_spec.rb +85 -0
  92. data/spec/backends/base_spec.rb +10 -0
  93. data/spec/backends/db_spec.rb +54 -0
  94. data/spec/backends/kafka_async_spec.rb +11 -0
  95. data/spec/backends/kafka_spec.rb +11 -0
  96. data/spec/batch_consumer_spec.rb +256 -0
  97. data/spec/config/configuration_spec.rb +248 -0
  98. data/spec/consumer_spec.rb +209 -0
  99. data/spec/deimos_spec.rb +169 -0
  100. data/spec/generators/active_record_generator_spec.rb +56 -0
  101. data/spec/handlers/my_batch_consumer.rb +10 -0
  102. data/spec/handlers/my_consumer.rb +10 -0
  103. data/spec/kafka_listener_spec.rb +55 -0
  104. data/spec/kafka_source_spec.rb +381 -0
  105. data/spec/kafka_topic_info_spec.rb +111 -0
  106. data/spec/message_spec.rb +19 -0
  107. data/spec/phobos.bad_db.yml +73 -0
  108. data/spec/phobos.yml +77 -0
  109. data/spec/producer_spec.rb +498 -0
  110. data/spec/rake_spec.rb +19 -0
  111. data/spec/schema_backends/avro_base_shared.rb +199 -0
  112. data/spec/schema_backends/avro_local_spec.rb +32 -0
  113. data/spec/schema_backends/avro_schema_registry_spec.rb +32 -0
  114. data/spec/schema_backends/avro_validation_spec.rb +24 -0
  115. data/spec/schema_backends/base_spec.rb +33 -0
  116. data/spec/schemas/com/my-namespace/Generated.avsc +71 -0
  117. data/spec/schemas/com/my-namespace/MyNestedSchema.avsc +62 -0
  118. data/spec/schemas/com/my-namespace/MySchema-key.avsc +13 -0
  119. data/spec/schemas/com/my-namespace/MySchema.avsc +18 -0
  120. data/spec/schemas/com/my-namespace/MySchemaCompound-key.avsc +18 -0
  121. data/spec/schemas/com/my-namespace/MySchemaWithBooleans.avsc +18 -0
  122. data/spec/schemas/com/my-namespace/MySchemaWithDateTimes.avsc +33 -0
  123. data/spec/schemas/com/my-namespace/MySchemaWithId.avsc +28 -0
  124. data/spec/schemas/com/my-namespace/MySchemaWithUniqueId.avsc +32 -0
  125. data/spec/schemas/com/my-namespace/Wibble.avsc +43 -0
  126. data/spec/schemas/com/my-namespace/Widget.avsc +27 -0
  127. data/spec/schemas/com/my-namespace/WidgetTheSecond.avsc +27 -0
  128. data/spec/schemas/com/my-namespace/request/CreateTopic.avsc +11 -0
  129. data/spec/schemas/com/my-namespace/request/Index.avsc +11 -0
  130. data/spec/schemas/com/my-namespace/request/UpdateRequest.avsc +11 -0
  131. data/spec/schemas/com/my-namespace/response/CreateTopic.avsc +11 -0
  132. data/spec/schemas/com/my-namespace/response/Index.avsc +11 -0
  133. data/spec/schemas/com/my-namespace/response/UpdateResponse.avsc +11 -0
  134. data/spec/spec_helper.rb +267 -0
  135. data/spec/utils/db_poller_spec.rb +320 -0
  136. data/spec/utils/db_producer_spec.rb +514 -0
  137. data/spec/utils/deadlock_retry_spec.rb +74 -0
  138. data/spec/utils/inline_consumer_spec.rb +31 -0
  139. data/spec/utils/lag_reporter_spec.rb +76 -0
  140. data/spec/utils/platform_schema_validation_spec.rb +0 -0
  141. data/spec/utils/schema_controller_mixin_spec.rb +84 -0
  142. data/support/deimos-solo.png +0 -0
  143. data/support/deimos-with-name-next.png +0 -0
  144. data/support/deimos-with-name.png +0 -0
  145. data/support/flipp-logo.png +0 -0
  146. metadata +551 -0
data/Rakefile ADDED
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/gem_tasks'
4
+ begin
5
+ require 'rspec/core/rake_task'
6
+
7
+ RSpec::Core::RakeTask.new(:spec)
8
+ task(default: :spec)
9
+ rescue LoadError
10
+ # no rspec available
11
+ end
12
+
13
+ import('./lib/tasks/deimos.rake')
data/bin/deimos ADDED
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'deimos'
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ lib = File.expand_path('lib', __dir__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+ require 'deimos/version'
6
+
7
+ Gem::Specification.new do |spec|
8
+ spec.name = 'deimos-temp-fork'
9
+ spec.version = Deimos::VERSION
10
+ spec.authors = ['Daniel Orner']
11
+ spec.email = ['daniel.orner@wishabi.com']
12
+ spec.summary = 'Kafka libraries for Ruby.'
13
+ spec.homepage = ''
14
+ spec.license = 'Apache-2.0'
15
+
16
+ spec.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ['lib']
20
+
21
+ spec.add_runtime_dependency('avro_turf', '~> 0.11')
22
+ spec.add_runtime_dependency('phobos_temp_fork')
23
+ spec.add_runtime_dependency('sigurd', '~> 0.0.1')
24
+ spec.add_runtime_dependency('fig_tree', '~> 0.0.2')
25
+
26
+ spec.add_development_dependency('activerecord-import')
27
+ spec.add_development_dependency('avro', '~> 1.9')
28
+ spec.add_development_dependency('database_cleaner', '~> 1.7')
29
+ spec.add_development_dependency('ddtrace', '~> 0.11')
30
+ spec.add_development_dependency('dogstatsd-ruby', '~> 4.2')
31
+ spec.add_development_dependency('guard', '~> 2')
32
+ spec.add_development_dependency('guard-rspec', '~> 4')
33
+ spec.add_development_dependency('guard-rubocop', '~> 1')
34
+ spec.add_development_dependency('mysql2', '~> 0.5')
35
+ spec.add_development_dependency('pg', '~> 1.1')
36
+ spec.add_development_dependency('rails', '~> 6')
37
+ spec.add_development_dependency('rake', '~> 13')
38
+ spec.add_development_dependency('rspec', '~> 3')
39
+ spec.add_development_dependency('rspec_junit_formatter', '~>0.3')
40
+ spec.add_development_dependency('rspec-rails', '~> 4')
41
+ spec.add_development_dependency('rubocop', '0.88.0')
42
+ spec.add_development_dependency('rubocop-rspec', '1.42.0')
43
+ spec.add_development_dependency('sqlite3', '~> 1.3')
44
+ end
@@ -0,0 +1,71 @@
1
+ version: '3.6'
2
+ services:
3
+ mysql:
4
+ image: mysql:5.7
5
+ expose:
6
+ - 3306
7
+ environment:
8
+ - MYSQL_ALLOW_EMPTY_PASSWORD=yes
9
+ - MYSQL_DATABASE=test
10
+ - TZ=America/Toronto
11
+
12
+ postgres:
13
+ image: postgres:11.1
14
+ expose:
15
+ - 5432
16
+ environment:
17
+ POSTGRES_PASSWORD: root
18
+
19
+ test:
20
+ volumes:
21
+ - .:/var/app
22
+ depends_on:
23
+ - kafka-broker
24
+ - mysql
25
+ - postgres
26
+ build: .
27
+ environment:
28
+ - "DEFAULT_TIMEOUT=${DEFAULT_TIMEOUT}"
29
+ - MYSQL_HOST=mysql
30
+ - PG_HOST=postgres
31
+ - SCHEMA_REGISTRY=http://schema-registry:8081
32
+ - KAFKA_SEED_BROKER=kafka-broker:9092
33
+ command: dockerize -wait tcp://mysql:3306 -wait tcp://postgres:5432 -timeout 1m rspec
34
+
35
+ zookeeper:
36
+ image: wurstmeister/zookeeper:latest
37
+ ports:
38
+ - 2181:2181
39
+
40
+ schema-registry:
41
+ image: confluentinc/cp-schema-registry
42
+ hostname: schema-registry
43
+ depends_on:
44
+ - zookeeper
45
+ - kafka-broker
46
+ ports:
47
+ - "8081:8081"
48
+ environment:
49
+ SCHEMA_REGISTRY_HOST_NAME: schema-registry
50
+ SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL: 'zookeeper:2181'
51
+
52
+ kafka-broker:
53
+ image: confluentinc/cp-enterprise-kafka
54
+ hostname: kafka-broker
55
+ depends_on:
56
+ - zookeeper
57
+ ports:
58
+ - "9092:9092"
59
+ environment:
60
+ KAFKA_BROKER_ID: 1
61
+ KAFKA_ZOOKEEPER_CONNECT: 'zookeeper:2181'
62
+ KAFKA_ADVERTISED_LISTENERS: 'PLAINTEXT://kafka-broker:9092'
63
+ KAFKA_METRIC_REPORTERS: io.confluent.metrics.reporter.ConfluentMetricsReporter
64
+ KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
65
+ KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0
66
+ CONFLUENT_METRICS_REPORTER_BOOTSTRAP_SERVERS: kafka-broker:9092
67
+ CONFLUENT_METRICS_REPORTER_ZOOKEEPER_CONNECT: zookeeper:2181
68
+ CONFLUENT_METRICS_REPORTER_TOPIC_REPLICAS: 1
69
+ CONFLUENT_METRICS_ENABLE: 'true'
70
+ CONFLUENT_SUPPORT_CUSTOMER_ID: 'anonymous'
71
+
@@ -0,0 +1,140 @@
1
+ # Deimos Architecture
2
+
3
+ Deimos is the third of three libraries that add functionality on top of each
4
+ other:
5
+
6
+ * [RubyKafka](https://github.com/zendesk/ruby-kafka) is the low-level Kafka
7
+ client, providing API's for producers, consumers and the client as a whole.
8
+ * [Phobos](https://github.com/phobos/phobos) is a lightweight wrapper on top
9
+ of RubyKafka that provides threaded consumers, a simpler way to write
10
+ producers, and lifecycle management.
11
+ * [Deimos](https://github.com/flipp-oss/deimos/) is a full-featured framework
12
+ using Phobos as its base which provides schema integration (e.g. Avro),
13
+ database integration, metrics, tracing, test helpers and other utilities.
14
+
15
+ ## Folder structure
16
+
17
+ As of May 12, 2020, the following are the important files to understand in how
18
+ Deimos fits together:
19
+ * `lib/generators`: Generators to generate database migrations, e.g.
20
+ for the DB Poller and DB Producer features.
21
+ * `lib/tasks`: Rake tasks for starting consumers, DB Pollers, etc.
22
+ * `lib/deimos`: Main Deimos code.
23
+ * `lib/deimos/deimos.rb`: The bootstrap / startup code for Deimos. Also provides
24
+ some global convenience methods and (for legacy purposes) the way to
25
+ start the DB Producer.
26
+ * `lib/deimos/backends`: The different plug-in producer backends - e.g. produce
27
+ directly to Kafka, use the DB backend, etc.
28
+ * `lib/deimos/schema_backends`: The different plug-in schema handlers, such
29
+ as the various flavors of Avro (with/without schema registry etc.)
30
+ * `lib/deimos/metrics`: The different plug-in metrics providers, e.g. Datadog.
31
+ * `lib/deimos/tracing`: The different plug-in tracing providers, e.g. Datadog.
32
+ * `lib/deimos/utils`: Utility classes for things not directly related to
33
+ producing and consuming, such as the DB Poller, DB Producer, lag reporter, etc.
34
+ * `lib/deimos/config`: Classes related to configuring Deimos.
35
+ * `lib/deimos/monkey_patches`: Monkey patches to existing libraries. These
36
+ should be removed in a future update.
37
+
38
+ ## Features
39
+
40
+ ### Producers and Consumers
41
+
42
+ Both producers and consumers include the `SharedConfig` module, which
43
+ standardizes configuration like schema settings, topic, keys, etc.
44
+
45
+ Consumers come in two flavors: `Consumer` and `BatchConsumer`. Both include
46
+ `BaseConsumer` for shared functionality.
47
+
48
+ While producing messages go to Kafka by default, literally anything else
49
+ can happen when your producer calls `produce`, by swapping out the producer
50
+ _backend_. This is just a file that needs to inherit from `Deimos::Backends::Base`
51
+ and must implement a single method, `execute`.
52
+
53
+ Producers have a complex workflow while processing the payload to publish. This
54
+ is aided by the `Deimos::Message` class (not to be confused with the
55
+ `KafkaMessage` class, which is an ActiveRecord used by the DB Producer feature,
56
+ below).
57
+
58
+ ### Schemas
59
+
60
+ Schema backends are used to encode and decode payloads into different formats
61
+ such as Avro. These are integrated with producers and consumers, as well
62
+ as test helpers. These are a bit more involved than producer backends, and
63
+ must define methods such as:
64
+ * `encode` a payload or key (when encoding a key, for Avro a key schema
65
+ may be auto-generated)
66
+ * `decode` a payload or key
67
+ * `validate` that a payload is correct for encoding
68
+ * `coerce` a payload into the given schema (e.g. turn ints into strings)
69
+ * Get a list of `schema_fields` in the configured schema, used when interacting
70
+ with ActiveRecord
71
+ * Define a `mock` backend when the given backend is used. This is used
72
+ during testing. Typically mock backends will validate values but not
73
+ actually encode/decode them.
74
+
75
+ ### Configuration
76
+
77
+ Deimos uses the [https://www.github.com/flipp_oss/fig_tree](fig_tree) gem for configuration.
78
+
79
+ The configuration definition for Deimos is in `config/configuration.rb`. In
80
+ addition, there are methods in `config/phobos_config.rb` which translate to/from
81
+ the Phobos configuration format and support the old `phobos.yml` method
82
+ of configuration.
83
+
84
+ ### Metrics and Tracing
85
+
86
+ These are simpler than other plugins and must implement the expected methods
87
+ (`increment`, `gauge`, `histogram` and `time` for metrics, and `start`, `finish`
88
+ and `set_error` for tracing). These are used primarily in producers and consumers.
89
+
90
+ ### ActiveRecord Integration
91
+
92
+ Deimos provides an `ActiveRecordConsumer` and `ActiveRecordProducer`. These are
93
+ relatively lightweight ways to save data into a database or read it off
94
+ the database as part of app logic. It uses things like the `coerce` method
95
+ of the schema backends to manage the differences between the given payload
96
+ and the configured schema for the topic.
97
+
98
+ ### Database Backend / Database Producer
99
+
100
+ This feature (which provides better performance and transaction guarantees)
101
+ is powered by two components:
102
+ * The `db` _publish backend_, which saves messages to the database rather
103
+ than to Kafka;
104
+ * The `DbProducer` utility, which runs as a separate process, pulls data
105
+ from the database and sends it to Kafka.
106
+
107
+ There are a set of utility classes that power the producer, which are largely
108
+ copied from Phobos:
109
+ * `Executor` takes a set of "runnable" things (which implement a `start` and `stop`
110
+ method) puts them in a thread pool and runs them all concurrently. It
111
+ manages starting and stopping all threads when necessary.
112
+ * `SignalHandler` wraps the Executor and handles SIGINT and SIGTERM signals
113
+ to stop the executor gracefully.
114
+
115
+ In the case of this feature, the `DbProducer` is the runnable object - it
116
+ can run several threads at once.
117
+
118
+ On the database side, the `ActiveRecord` models that power this feature are:
119
+ * `KafkaMessage`: The actual message, saved to the database. This message
120
+ is already encoded by the producer, so only has to be sent.
121
+ * `KafkaTopicInfo`: Used for locking topics so only one producer can work
122
+ on it at once.
123
+
124
+ A Rake task (defined in `deimos.rake`) can be used to start the producer.
125
+
126
+ ### Database Poller
127
+
128
+ This feature (which periodically polls the database to send Kafka messages)
129
+ primarily uses other aspects of Deimos and hence is relatively small in size.
130
+ The `DbPoller` class acts as a "runnable" and is used by an Executor (above).
131
+ The `PollInfo` class is saved to the database to keep track of where each
132
+ poller is up to.
133
+
134
+ A Rake task (defined in `deimos.rake`) can be used to start the pollers.
135
+
136
+ ### Other Utilities
137
+
138
+ The `utils` folder also contains the `LagReporter` (which sends metrics on
139
+ lag) and the `InlineConsumer`, which can read data from a topic and directly
140
+ pass it into a handler or save it to memory.
@@ -0,0 +1,236 @@
1
+ # Configuration
2
+
3
+ Deimos supports a succinct, readable syntax which uses
4
+ pure Ruby to allow flexible configuration.
5
+
6
+ You can access any configuration value via a simple `Deimos.config.whatever`.
7
+
8
+ Nested configuration is denoted in simple dot notation:
9
+ `kafka.ssl.enabled`. Headings below will follow the nested
10
+ configurations.
11
+
12
+ ## Base Configuration
13
+ Config name|Default|Description
14
+ -----------|-------|-----------
15
+ logger|`Logger.new(STDOUT)`|The logger that Deimos will use.
16
+ phobos_logger|`Deimos.config.logger`|The logger passed to Phobos.
17
+ metrics|`Deimos::Metrics::Mock.new`|The metrics backend use for reporting.
18
+ tracer|`Deimos::Tracer::Mock.new`|The tracer backend used for debugging.
19
+
20
+ ## Defining Producers
21
+
22
+ You can define a new producer thusly:
23
+ ```ruby
24
+ Deimos.configure do
25
+ producer do
26
+ class_name 'MyProducer'
27
+ topic 'MyTopic'
28
+ schema 'MyTopicSchema'
29
+ namespace 'my.namespace'
30
+ key_config field: :id
31
+
32
+ # If config.schema.path is app/schemas, assumes there is a file in
33
+ # app/schemas/my/namespace/MyTopicSchema.avsc
34
+ end
35
+ end
36
+ ```
37
+
38
+ You can have as many `producer` blocks as you like to define more producers.
39
+
40
+ Config name|Default|Description
41
+ -----------|-------|-----------
42
+ class_name|nil|Class name of the producer class (subclass of `Deimos::Producer`.)
43
+ topic|nil|Topic to produce to.
44
+ schema|nil|Name of the schema to use to encode data before producing.
45
+ namespace|nil|Namespace of the schema to use when finding it locally.
46
+ key_config|nil|Configuration hash for message keys. See [Kafka Message Keys](../README.md#installation)
47
+
48
+ ## Defining Consumers
49
+
50
+ Consumers are defined almost identically to producers:
51
+
52
+ ```ruby
53
+ Deimos.configure do
54
+ consumer do
55
+ class_name 'MyConsumer'
56
+ topic 'MyTopic'
57
+ schema 'MyTopicSchema'
58
+ namespace 'my.namespace'
59
+ key_config field: :id
60
+
61
+ # Setting to :inline_batch will invoke consume_batch instead of consume
62
+ # for each batch of messages.
63
+ delivery :batch
64
+
65
+ # If config.schema.path is app/schemas, assumes there is a file in
66
+ # app/schemas/my/namespace/MyTopicSchema.avsc
67
+ end
68
+ end
69
+ ```
70
+
71
+ In addition to the producer configs, you can define a number of overrides
72
+ to the basic consumer configuration for each consumer. This is analogous to
73
+ the `listener` config in `phobos.yml`.
74
+
75
+ Config name|Default|Description
76
+ -----------|-------|-----------
77
+ class_name|nil|Class name of the consumer class (subclass of `Deimos::Consumer`.)
78
+ topic|nil|Topic to produce to.
79
+ schema|nil|This is optional but strongly recommended for testing purposes; this will validate against a local schema file used as the reader schema, as well as being able to write tests against this schema. This is recommended since it ensures you are always getting the values you expect.
80
+ namespace|nil|Namespace of the schema to use when finding it locally.
81
+ key_config|nil|Configuration hash for message keys. See [Kafka Message Keys](../README.md#installation)
82
+ disabled|false|Set to true to skip starting an actual listener for this consumer on startup.
83
+ group_id|nil|ID of the consumer group.
84
+ max_concurrency|1|Number of threads created for this listener. Each thread will behave as an independent consumer. They don't share any state.
85
+ start_from_beginning|true|Once the consumer group has checkpointed its progress in the topic's partitions, the consumers will always start from the checkpointed offsets, regardless of config. As such, this setting only applies when the consumer initially starts consuming from a topic
86
+ max_bytes_per_partition|512.kilobytes|Maximum amount of data fetched from a single partition at a time.
87
+ min_bytes|1|Minimum number of bytes to read before returning messages from the server; if `max_wait_time` is reached, this is ignored.
88
+ max_wait_time|5|Maximum duration of time to wait before returning messages from the server, in seconds.
89
+ force_encoding|nil|Apply this encoding to the message payload. If blank it uses the original encoding. This property accepts values defined by the ruby Encoding class (https://ruby-doc.org/core-2.3.0/Encoding.html). Ex: UTF_8, ASCII_8BIT, etc.
90
+ delivery|`:batch`|The delivery mode for the consumer. Possible values: `:message, :batch, :inline_batch`. See Phobos documentation for more details.
91
+ session_timeout|300|Number of seconds after which, if a client hasn't contacted the Kafka cluster, it will be kicked out of the group.
92
+ offset_commit_interval|10|Interval between offset commits, in seconds.
93
+ offset_commit_threshold|0|Number of messages that can be processed before their offsets are committed. If zero, offset commits are not triggered by message processing
94
+ offset_retention_time|nil|The time period that committed offsets will be retained, in seconds. Defaults to the broker setting.
95
+ heartbeat_interval|10|Interval between heartbeats; must be less than the session window.
96
+ backoff|`(1000..60_000)`|Range representing the minimum and maximum number of milliseconds to back off after a consumer error.
97
+
98
+ ## Defining Database Pollers
99
+
100
+ These are used when polling the database via `rake deimos:db_poller`. You
101
+ can create a number of pollers, one per topic.
102
+
103
+ ```ruby
104
+ Deimos.configure do
105
+ db_poller do
106
+ producer_class 'MyProducer'
107
+ run_every 2.minutes
108
+ end
109
+ end
110
+ ```
111
+
112
+ Config name|Default|Description
113
+ -----------|-------|-----------
114
+ producer_class|nil|ActiveRecordProducer class to use for sending messages.
115
+ run_every|60|Amount of time in seconds to wait between runs.
116
+ timestamp_column|`:updated_at`|Name of the column to query. Remember to add an index to this column!
117
+ delay_time|2|Amount of time in seconds to wait before picking up records, to allow for transactions to finish.
118
+ full_table|false|If set to true, do a full table dump to Kafka each run. Good for very small tables.
119
+ start_from_beginning|true|If false, start from the current time instead of the beginning of time if this is the first time running the poller.
120
+
121
+ ## Kafka Configuration
122
+
123
+ Config name|Default|Description
124
+ -----------|-------|-----------
125
+ kafka.logger|`Deimos.config.logger`|Logger passed to RubyKafka.
126
+ kafka.seed_brokers|`['localhost:9092']`|URL for the Kafka brokers.
127
+ kafka.client_id|`phobos`|Identifier for this application.
128
+ kafka.connect_timeout|15|The socket timeout for connecting to the broker, in seconds.
129
+ kafka.socket_timeout|15|The socket timeout for reading and writing to the broker, in seconds.
130
+ kafka.ssl.enabled|false|Whether SSL is enabled on the brokers.
131
+ kafka.ssl.ca_cert|nil| A PEM encoded CA cert, a file path to the cert, or an Array of certs to use with an SSL connection.
132
+ kafka.ssl.client_cert|nil|A PEM encoded client cert to use with an SSL connection, or a file path to the cert.
133
+ kafka.ssl.client_cert_key|nil|A PEM encoded client cert key to use with an SSL connection.
134
+
135
+ ## Consumer Configuration
136
+
137
+ These are top-level configuration settings, but they can be overridden
138
+ by individual consumers.
139
+
140
+ Config name|Default|Description
141
+ -----------|-------|-----------
142
+ consumers.session_timeout|300|Number of seconds after which, if a client hasn't contacted the Kafka cluster, it will be kicked out of the group.
143
+ consumers.offset_commit_interval|10|Interval between offset commits, in seconds.
144
+ consumers.offset_commit_threshold|0|Number of messages that can be processed before their offsets are committed. If zero, offset commits are not triggered by message processing
145
+ consumers.heartbeat_interval|10|Interval between heartbeats; must be less than the session window.
146
+ consumers.backoff|`(1000..60_000)`|Range representing the minimum and maximum number of milliseconds to back off after a consumer error.
147
+ consumers.reraise_errors|false|Default behavior is to swallow uncaught exceptions and log to the metrics provider. Set this to true to instead raise all errors. Note that raising an error will ensure that the message cannot be processed - if there is a bad message which will always raise that error, your consumer will not be able to proceed past it and will be stuck forever until you fix your code. See also the `fatal_error` configuration. This is automatically set to true when using the `TestHelpers` module in RSpec.
148
+ consumers.report_lag|false|Whether to send the `consumer_lag` metric. This requires an extra thread per consumer.
149
+ consumers.fatal_error|`proc { false }`|Block taking an exception, payload and metadata and returning true if this should be considered a fatal error and false otherwise. E.g. you can use this to always fail if the database is available. Not needed if reraise_errors is set to true.
150
+
151
+ ## Producer Configuration
152
+
153
+ Config name|Default|Description
154
+ -----------|-------|-----------
155
+ producers.ack_timeout|5|Number of seconds a broker can wait for replicas to acknowledge a write before responding with a timeout.
156
+ producers.required_acks|1|Number of replicas that must acknowledge a write, or `:all` if all in-sync replicas must acknowledge.
157
+ producers.max_retries|2|Number of retries that should be attempted before giving up sending messages to the cluster. Does not include the original attempt.
158
+ producers.retry_backoff|1|Number of seconds to wait between retries.
159
+ producers.max_buffer_size|10_000|Number of messages allowed in the buffer before new writes will raise `BufferOverflow` exceptions.
160
+ producers.max_buffer_bytesize|10_000_000|Maximum size of the buffer in bytes. Attempting to produce messages when the buffer reaches this size will result in `BufferOverflow` being raised.
161
+ producers.compression_codec|nil|Name of the compression codec to use, or nil if no compression should be performed. Valid codecs: `:snappy` and `:gzip`
162
+ producers.compression_threshold|1|Number of messages that needs to be in a message set before it should be compressed. Note that message sets are per-partition rather than per-topic or per-producer.
163
+ producers.max_queue_size|10_000|Maximum number of messages allowed in the queue. Only used for async_producer.
164
+ producers.delivery_threshold|0|If greater than zero, the number of buffered messages that will automatically trigger a delivery. Only used for async_producer.
165
+ producers.delivery_interval|0|if greater than zero, the number of seconds between automatic message deliveries. Only used for async_producer.
166
+ producers.persistent_connections|false|Set this to true to keep the producer connection between publish calls. This can speed up subsequent messages by around 30%, but it does mean that you need to manually call sync_producer_shutdown before exiting, similar to async_producer_shutdown.
167
+ producers.schema_namespace|nil|Default namespace for all producers. Can remain nil. Individual producers can override.
168
+ producers.topic_prefix|nil|Add a prefix to all topic names. This can be useful if you're using the same Kafka broker for different environments that are producing the same topics.
169
+ producers.disabled|false|Disable all actual message producing. Generally more useful to use the `disable_producers` method instead.
170
+ producers.backend|`:kafka_async`|Currently can be set to `:db`, `:kafka`, or `:kafka_async`. If using Kafka directly, a good pattern is to set to async in your user-facing app, and sync in your consumers or delayed workers.
171
+
172
+ ## Schema Configuration
173
+
174
+ Config name|Default|Description
175
+ -----------|-------|-----------
176
+ schema.backend|`:mock`|Backend representing the schema encoder/decoder. You can see a full list [here](../lib/deimos/schema_backends).
177
+ schema.registry_url|`http://localhost:8081`|URL of the Confluent schema registry.
178
+ schema.path|nil|Local path to find your schemas.
179
+
180
+ ## Database Producer Configuration
181
+
182
+ Config name|Default|Description
183
+ -----------|-------|-----------
184
+ db_producer.logger|`Deimos.config.logger`|Logger to use inside the DB producer.
185
+ db_producer.log_topics|`[]`|List of topics to print full messages for, or `:all` to print all topics. This can introduce slowdown since it needs to decode each message using the schema registry.
186
+ db_producer.compact_topics|`[]`|List of topics to compact before sending, i.e. only send the last message with any given key in a batch. This is an optimization which mirrors what Kafka itself will do with compaction turned on but only within a single batch. You can also specify `:all` to compact all topics.
187
+
188
+ ## Configuration Syntax
189
+
190
+ Sample:
191
+
192
+ ```ruby
193
+ Deimos.configure do
194
+ logger Logger.new(STDOUT)
195
+ # Nested config field
196
+ kafka.seed_brokers ['my.kafka.broker:9092']
197
+
198
+ # Multiple nested config fields via block
199
+ consumers do
200
+ session_timeout 30
201
+ offset_commit_interval 10
202
+ end
203
+
204
+ # Define a new producer
205
+ producer do
206
+ class_name 'MyProducer'
207
+ topic 'MyTopic'
208
+ schema 'MyTopicSchema'
209
+ key_config field: :id
210
+ end
211
+
212
+ # Define another new producer
213
+ producer do
214
+ class_name 'AnotherProducer'
215
+ topic 'AnotherTopic'
216
+ schema 'AnotherSchema'
217
+ key_config plain: true
218
+ end
219
+
220
+ # Define a consumer
221
+ consumer do
222
+ class_name 'MyConsumer'
223
+ topic 'TopicToConsume'
224
+ schema 'ConsumerSchema'
225
+ key_config plain: true
226
+ # include Phobos / RubyKafka configs
227
+ start_from_beginning true
228
+ heartbeat_interval 10
229
+ end
230
+
231
+ end
232
+ ```
233
+
234
+ Note that all blocks are evaluated in the context of the configuration object.
235
+ If you're calling this inside another class or method, you'll need to save
236
+ things you need to reference into local variables before calling `configure`.