racecar 2.2.0 → 2.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e2c32338556d27bfcbae35df0758163a457d2c6e3f520141206c5a9c0124cc68
4
- data.tar.gz: 43d9c42d0d483c269b15d417ef59e2985da3ca3945d34d6772f8dd262ccbfaf6
3
+ metadata.gz: 9bb07a3f984de71b18b9401ec7908158aba831c54a4972e9de7af59d2d7ae3ea
4
+ data.tar.gz: 6e00e2f40495624944963868001e0cf20885c9f33e49ad8d147d0037562e96fa
5
5
  SHA512:
6
- metadata.gz: 0467ac1cdefb6cad9870dd73b92f4a5a943b9f685ff3fc876b3f183d109ae3d29d7c2c7dffea8f31bca7c7b18565e5aba04d4865c94f2448a7228be175855a5b
7
- data.tar.gz: e4ab43eb180995af916d447b006438b4a48cb808b29aabec52b455e246541a083192d1b560a957fa6f3ab7d5412dd12ab74aac49acc0b606c3df87cec90b93b6
6
+ metadata.gz: 0f5d1b6b84dbd96343a09387709676e0ac1250f1ac79f82225cb2060d8900f91f724f1bf0c722d6c440ce6c65ee175da930fc0bd542f6138eeeaab7d80d9662a
7
+ data.tar.gz: ca635a04ca6ea5019e625563417ac5c3097d2999d145a26089a9f86eb00312cf242110dcf44958180bdafbfebac5a319fb70b559d2cebe364c906caa93db9ced
@@ -1,9 +1,10 @@
1
1
  name: CI
2
2
 
3
3
  on:
4
+ pull_request:
5
+ branches: ["master"]
4
6
  push:
5
- branches:
6
- - '**'
7
+ branches: ["master"]
7
8
 
8
9
  jobs:
9
10
  unit-specs:
@@ -11,12 +12,12 @@ jobs:
11
12
 
12
13
  strategy:
13
14
  matrix:
14
- ruby-version: ["2.5", "2.6"]
15
+ ruby-version: ["2.4", "2.5", "2.6", "3.0"]
15
16
 
16
17
  steps:
17
18
  - uses: zendesk/checkout@v2
18
19
  - name: Set up Ruby
19
- uses: zendesk/setup-ruby@v1.58.0
20
+ uses: zendesk/setup-ruby@v1.64.1
20
21
  with:
21
22
  ruby-version: ${{ matrix.ruby-version }}
22
23
  bundler-cache: true
@@ -25,37 +26,16 @@ jobs:
25
26
 
26
27
  integration-specs:
27
28
  runs-on: ubuntu-latest
28
-
29
- services:
30
- zookeeper:
31
- image: confluentinc/cp-zookeeper
32
- ports:
33
- - 2181:2181
34
- env:
35
- ZOOKEEPER_CLIENT_PORT: 2181
36
-
37
- kafka:
38
- image: confluentinc/cp-kafka
39
- ports:
40
- - 9092:9092
41
- - 29092:29092
42
- options: --health-cmd "kafka-topics --list --bootstrap-server=localhost:9092" --health-interval 10s --health-timeout 5s --health-retries 5
43
- env:
44
- KAFKA_BROKER_ID: 1
45
- KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
46
- KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:29092,PLAINTEXT_HOST://localhost:9092
47
- KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
48
- KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT
49
- KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
50
-
51
29
  steps:
52
30
  - uses: zendesk/checkout@v2
53
31
  - name: Set up Ruby
54
- uses: zendesk/setup-ruby@v1.58.0
32
+ uses: zendesk/setup-ruby@v1.64.1
55
33
  with:
56
34
  ruby-version: 2.7
57
35
  bundler-cache: true
36
+ - name: Bring up docker-compose stack
37
+ run: docker-compose up -d
58
38
  - name: Build and test with RSpec
59
39
  env:
60
40
  RACECAR_BROKERS: localhost:9092
61
- run: bundle exec rspec --format documentation --require spec_helper --color spec/integration/*_spec.rb
41
+ run: timeout --kill-after 180 150 bundle exec rspec --format documentation --require spec_helper --color spec/integration/*_spec.rb
data/CHANGELOG.md CHANGED
@@ -1,6 +1,24 @@
1
1
  # Changelog
2
2
 
3
- ## Unreleased
3
+ ## v2.4.0
4
+
5
+ * Update librdkafka version from 1.4.0 to 1.5.0 by upgrading from rdkafka 0.8.0 to 0.10.0. ([#263](https://github.com/zendesk/racecar/pull/263))
6
+ * Restore support for Ruby 2.4 (#258)
7
+
8
+ ## racecar v2.3.1
9
+
10
+ * Handle `ERR_NOT_COORDINATOR` (#209)
11
+
12
+ ## racecar v2.3.0
13
+
14
+ * Add native support for Heroku (#248)
15
+ * [Racecar::Consumer] When messages fail to deliver, an extended error with hints is now raised. Instead of `Rdkafka::RdkafkaError` you'll get a `Racecar::MessageDeliveryError` instead. ([#219](https://github.com/zendesk/racecar/pull/219)). If you have set a `Racecar.config.error_handler`, it might need to be updated.
16
+ * [Racecar::Consumer] When message delivery times out, Racecar will reset the producer in an attempt to fix some of the potential causes for this error. ([#219](https://github.com/zendesk/racecar/pull/219))
17
+ * Validate the `process` and `process_batch` method signature on consumer classes when initializing (#236)
18
+ * Add Ruby 3.0 compatibility (#237)
19
+ * Introduce parallel runner, which forks a number of independent consumers, allowing partitions to be processed in parallel. ([#222](https://github.com/zendesk/racecar/pull/222))
20
+ * [Racecar::Runner] Ensure producer is closed, whether it closes or errors. ([#222](https://github.com/zendesk/racecar/pull/222))
21
+ * Configure `statistics_interval` directly in the config. Disable statistics when no callback is defined ([#232](https://github.com/zendesk/racecar/pull/232))
4
22
 
5
23
  ## racecar v2.2.0
6
24
 
data/Dockerfile ADDED
@@ -0,0 +1,9 @@
1
+ FROM circleci/ruby:2.7.2
2
+
3
+ RUN sudo apt-get update
4
+ RUN sudo apt-get install docker
5
+
6
+ WORKDIR /app
7
+ COPY . .
8
+
9
+ RUN bundle install
data/Gemfile CHANGED
@@ -4,3 +4,7 @@ source 'https://rubygems.org'
4
4
 
5
5
  # Specify your gem's dependencies in racecar.gemspec
6
6
  gemspec
7
+
8
+ # We actually support version 6.0 (see gemspec); this extra restriction is added just for running the test suite also
9
+ # on Ruby 2.4, which activesupport 6.0 no longer supports
10
+ gem 'activesupport', '< 6.0'
data/Gemfile.lock CHANGED
@@ -1,35 +1,34 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- racecar (2.1.1)
4
+ racecar (2.3.1)
5
5
  king_konf (~> 1.0.0)
6
- rdkafka (~> 0.8.0)
6
+ rdkafka (~> 0.10.0)
7
7
 
8
8
  GEM
9
9
  remote: https://rubygems.org/
10
10
  specs:
11
- activesupport (6.0.3.4)
11
+ activesupport (5.2.6)
12
12
  concurrent-ruby (~> 1.0, >= 1.0.2)
13
13
  i18n (>= 0.7, < 2)
14
14
  minitest (~> 5.1)
15
15
  tzinfo (~> 1.1)
16
- zeitwerk (~> 2.2, >= 2.2.2)
17
16
  coderay (1.1.3)
18
- concurrent-ruby (1.1.7)
17
+ concurrent-ruby (1.1.9)
19
18
  diff-lcs (1.4.4)
20
19
  dogstatsd-ruby (4.8.2)
21
- ffi (1.13.1)
22
- i18n (1.8.5)
20
+ ffi (1.15.4)
21
+ i18n (1.8.10)
23
22
  concurrent-ruby (~> 1.0)
24
23
  king_konf (1.0.0)
25
24
  method_source (1.0.0)
26
- mini_portile2 (2.5.0)
27
- minitest (5.14.2)
25
+ mini_portile2 (2.7.0)
26
+ minitest (5.14.4)
28
27
  pry (0.13.1)
29
28
  coderay (~> 1.1)
30
29
  method_source (~> 1.0)
31
30
  rake (13.0.1)
32
- rdkafka (0.8.1)
31
+ rdkafka (0.10.0)
33
32
  ffi (~> 1.9)
34
33
  mini_portile2 (~> 2.1)
35
34
  rake (>= 12.3)
@@ -37,26 +36,25 @@ GEM
37
36
  rspec-core (~> 3.10.0)
38
37
  rspec-expectations (~> 3.10.0)
39
38
  rspec-mocks (~> 3.10.0)
40
- rspec-core (3.10.0)
39
+ rspec-core (3.10.1)
41
40
  rspec-support (~> 3.10.0)
42
- rspec-expectations (3.10.0)
41
+ rspec-expectations (3.10.1)
43
42
  diff-lcs (>= 1.2.0, < 2.0)
44
43
  rspec-support (~> 3.10.0)
45
- rspec-mocks (3.10.0)
44
+ rspec-mocks (3.10.2)
46
45
  diff-lcs (>= 1.2.0, < 2.0)
47
46
  rspec-support (~> 3.10.0)
48
- rspec-support (3.10.0)
47
+ rspec-support (3.10.2)
49
48
  thread_safe (0.3.6)
50
49
  timecop (0.9.2)
51
- tzinfo (1.2.8)
50
+ tzinfo (1.2.9)
52
51
  thread_safe (~> 0.1)
53
- zeitwerk (2.4.2)
54
52
 
55
53
  PLATFORMS
56
54
  ruby
57
55
 
58
56
  DEPENDENCIES
59
- activesupport (>= 4.0, < 6.1)
57
+ activesupport (< 6.0)
60
58
  bundler (>= 1.13, < 3)
61
59
  dogstatsd-ruby (>= 4.0.0, < 5.0.0)
62
60
  pry
@@ -66,4 +64,4 @@ DEPENDENCIES
66
64
  timecop
67
65
 
68
66
  BUNDLED WITH
69
- 2.1.4
67
+ 2.2.15
data/README.md CHANGED
@@ -10,22 +10,21 @@ The framework is based on [rdkafka-ruby](https://github.com/appsignal/rdkafka-ru
10
10
 
11
11
  1. [Installation](#installation)
12
12
  2. [Usage](#usage)
13
- 1. [Creating consumers](#creating-consumers)
14
- 2. [Running consumers](#running-consumers)
15
- 3. [Producing messages](#producing-messages)
16
- 4. [Configuration](#configuration)
17
- 5. [Testing consumers](#testing-consumers)
18
- 6. [Deploying consumers](#deploying-consumers)
19
- 7. [Handling errors](#handling-errors)
20
- 8. [Logging](#logging)
21
- 9. [Operations](#operations)
22
- 10. [Upgrading from v1 to v2](#upgrading-from-v1-to-v2)
13
+ 1. [Creating consumers](#creating-consumers)
14
+ 2. [Running consumers](#running-consumers)
15
+ 3. [Producing messages](#producing-messages)
16
+ 4. [Configuration](#configuration)
17
+ 5. [Testing consumers](#testing-consumers)
18
+ 6. [Deploying consumers](#deploying-consumers)
19
+ 7. [Handling errors](#handling-errors)
20
+ 8. [Logging](#logging)
21
+ 9. [Operations](#operations)
22
+ 10. [Upgrading from v1 to v2](#upgrading-from-v1-to-v2)
23
23
  3. [Development](#development)
24
24
  4. [Contributing](#contributing)
25
25
  5. [Support and Discussion](#support-and-discussion)
26
26
  6. [Copyright and license](#copyright-and-license)
27
27
 
28
-
29
28
  ## Installation
30
29
 
31
30
  Add this line to your application's Gemfile:
@@ -77,12 +76,38 @@ In order to create your own consumer, run the Rails generator `racecar:consumer`
77
76
 
78
77
  $ bundle exec rails generate racecar:consumer TapDance
79
78
 
80
- This will create a file at `app/consumers/tap_dance_consumer.rb` which you can modify to your liking. Add one or more calls to `subscribes_to` in order to have the consumer subscribe to Kafka topics.
79
+ This will create a file at `app/consumers/tap_dance_consumer.rb` which you can modify to your liking. Add one or more calls to `subscribes_to` in order to have the consumer subscribe to Kafka topics.
81
80
 
82
81
  Now run your consumer with `bundle exec racecar TapDanceConsumer`.
83
82
 
84
83
  Note: if you're not using Rails, you'll have to add the file yourself. No-one will judge you for copy-pasting it.
85
84
 
85
+ #### Running consumers in parallel (experimental)
86
+
87
+ Warning - limited battle testing in production environments; use at your own risk!
88
+
89
+ If you want to process different partitions in parallel, and don't want to deploy a number of instances matching the total partitions of the topic, you can specify the number of workers to spin up - that number of processes will be forked, and each will register its own consumer in the group. Some things to note:
90
+
91
+ - This would make no difference on a single partitioned topic - only one consumer would ever be assigned a partition. A couple of example configurations to process all partitions in parallel (we'll assume a 15 partition topic):
92
+ - Parallel workers set to 3, 5 separate instances / replicas running in your container orchestrator
93
+ - Parallel workers set to 5, 3 separate instances / replicas running in your container orchestrator
94
+ - Since we're forking new processes, the memory demands are a little higher
95
+ - From some initial testing, running 5 parallel workers requires no more than double the memory of running a Racecar consumer without parallelism.
96
+
97
+ The number of parallel workers is configured per consumer class; you may only want to take advantage of this for busier consumers:
98
+
99
+ ```ruby
100
+ class ParallelProcessingConsumer < Racecar::Consumer
101
+ subscribes_to "some-topic"
102
+
103
+ self.parallel_workers = 5
104
+
105
+ def process(message)
106
+ ...
107
+ end
108
+ end
109
+ ```
110
+
86
111
  #### Initializing consumers
87
112
 
88
113
  You can optionally add an `initialize` method if you need to do any set-up work before processing messages, e.g.
@@ -158,9 +183,9 @@ message.headers #=> { "Header-A" => 42, ... }
158
183
 
159
184
  In order to avoid your consumer being kicked out of its group during long-running message processing operations, you'll need to let Kafka regularly know that the consumer is still healthy. There's two mechanisms in place to ensure that:
160
185
 
161
- *Heartbeats:* They are automatically sent in the background and ensure the broker can still talk to the consumer. This will detect network splits, ungraceful shutdowns, etc.
186
+ _Heartbeats:_ They are automatically sent in the background and ensure the broker can still talk to the consumer. This will detect network splits, ungraceful shutdowns, etc.
162
187
 
163
- *Message Fetch Interval:* Kafka expects the consumer to query for new messages within this time limit. This will detect situations with slow IO or the consumer being stuck in an infinite loop without making actual progress. This limit applies to a whole batch if you do batch processing. Use `max_poll_interval` to increase the default 5 minute timeout, or reduce batching with `fetch_messages`.
188
+ _Message Fetch Interval:_ Kafka expects the consumer to query for new messages within this time limit. This will detect situations with slow IO or the consumer being stuck in an infinite loop without making actual progress. This limit applies to a whole batch if you do batch processing. Use `max_poll_interval` to increase the default 5 minute timeout, or reduce batching with `fetch_messages`.
164
189
 
165
190
  #### Tearing down resources when stopping
166
191
 
@@ -239,33 +264,35 @@ end
239
264
 
240
265
  #### Basic configuration
241
266
 
242
- * `brokers` – A list of Kafka brokers in the cluster that you're consuming from. Defaults to `localhost` on port 9092, the default Kafka port.
243
- * `client_id` – A string used to identify the client in logs and metrics.
244
- * `group_id` – The group id to use for a given group of consumers. Note that this _must_ be different for each consumer class. If left blank a group id is generated based on the consumer class name such that (for example) a consumer with the class name `BaconConsumer` would default to a group id of `bacon-consumer`.
245
- * `group_id_prefix` – A prefix used when generating consumer group names. For instance, if you set the prefix to be `kevin.` and your consumer class is named `BaconConsumer`, the resulting consumer group will be named `kevin.bacon-consumer`.
267
+ - `brokers` – A list of Kafka brokers in the cluster that you're consuming from. Defaults to `localhost` on port 9092, the default Kafka port.
268
+ - `client_id` – A string used to identify the client in logs and metrics.
269
+ - `group_id` – The group id to use for a given group of consumers. Note that this _must_ be different for each consumer class. If left blank a group id is generated based on the consumer class name such that (for example) a consumer with the class name `BaconConsumer` would default to a group id of `bacon-consumer`.
270
+ - `group_id_prefix` – A prefix used when generating consumer group names. For instance, if you set the prefix to be `kevin.` and your consumer class is named `BaconConsumer`, the resulting consumer group will be named `kevin.bacon-consumer`.
246
271
 
247
272
  #### Logging
248
273
 
249
- * `logfile` – A filename that log messages should be written to. Default is `nil`, which means logs will be written to standard output.
250
- * `log_level` – The log level for the Racecar logs, one of `debug`, `info`, `warn`, or `error`. Default is `info`.
274
+ - `logfile` – A filename that log messages should be written to. Default is `nil`, which means logs will be written to standard output.
275
+ - `log_level` – The log level for the Racecar logs, one of `debug`, `info`, `warn`, or `error`. Default is `info`.
251
276
 
252
277
  #### Consumer checkpointing
253
278
 
254
279
  The consumers will checkpoint their positions from time to time in order to be able to recover from failures. This is called _committing offsets_, since it's done by tracking the offset reached in each partition being processed, and committing those offset numbers to the Kafka offset storage API. If you can tolerate more double-processing after a failure, you can increase the interval between commits in order to better performance. You can also do the opposite if you prefer less chance of double-processing.
255
280
 
256
- * `offset_commit_interval` – How often to save the consumer's position in Kafka. Default is every 10 seconds.
281
+ - `offset_commit_interval` – How often to save the consumer's position in Kafka. Default is every 10 seconds.
257
282
 
258
283
  #### Timeouts & intervals
259
284
 
260
285
  All timeouts are defined in number of seconds.
261
286
 
262
- * `session_timeout` – The idle timeout after which a consumer is kicked out of the group. Consumers must send heartbeats with at least this frequency.
263
- * `heartbeat_interval` – How often to send a heartbeat message to Kafka.
264
- * `max_poll_interval` – The maximum time between two message fetches before the consumer is kicked out of the group. Put differently, your (batch) processing must finish earlier than this.
265
- * `pause_timeout` – How long to pause a partition for if the consumer raises an exception while processing a message. Default is to pause for 10 seconds. Set this to `0` in order to disable automatic pausing of partitions or to `-1` to pause indefinitely.
266
- * `pause_with_exponential_backoff` – Set to `true` if you want to double the `pause_timeout` on each consecutive failure of a particular partition.
267
- * `socket_timeout` – How long to wait when trying to communicate with a Kafka broker. Default is 30 seconds.
268
- * `max_wait_time` – How long to allow the Kafka brokers to wait before returning messages. A higher number means larger batches, at the cost of higher latency. Default is 1 second.
287
+ - `session_timeout` – The idle timeout after which a consumer is kicked out of the group. Consumers must send heartbeats with at least this frequency.
288
+ - `heartbeat_interval` – How often to send a heartbeat message to Kafka.
289
+ - `max_poll_interval` – The maximum time between two message fetches before the consumer is kicked out of the group. Put differently, your (batch) processing must finish earlier than this.
290
+ - `pause_timeout` – How long to pause a partition for if the consumer raises an exception while processing a message. Default is to pause for 10 seconds. Set this to `0` in order to disable automatic pausing of partitions or to `-1` to pause indefinitely.
291
+ - `pause_with_exponential_backoff` – Set to `true` if you want to double the `pause_timeout` on each consecutive failure of a particular partition.
292
+ - `socket_timeout` – How long to wait when trying to communicate with a Kafka broker. Default is 30 seconds.
293
+ - `max_wait_time` – How long to allow the Kafka brokers to wait before returning messages. A higher number means larger batches, at the cost of higher latency. Default is 1 second.
294
+ - `message_timeout` – How long to try to deliver a produced message before finally giving up. Default is 5 minutes. Transient errors are automatically retried. If a message delivery fails, the current read message batch is retried.
295
+ - `statistics_interval` – How frequently librdkafka should publish statistics about its consumers and producers; you must also add a `statistics_callback` method to your processor, otherwise the stats are disabled. The default is 1 second, however this can be quite memory hungry, so you may want to tune this and monitor.
269
296
 
270
297
  #### Memory & network usage
271
298
 
@@ -273,53 +300,55 @@ Kafka is _really_ good at throwing data at consumers, so you may want to tune th
273
300
 
274
301
  Racecar uses [rdkafka-ruby](https://github.com/appsignal/rdkafka-ruby) under the hood, which fetches messages from the Kafka brokers in a background thread. This thread pushes fetch responses, possible containing messages from many partitions, into a queue that is read by the processing thread (AKA your code). The main way to control the fetcher thread is to control the size of those responses and the size of the queue.
275
302
 
276
- * `max_bytes` — Maximum amount of data the broker shall return for a Fetch request.
277
- * `min_message_queue_size` — The minimum number of messages in the local consumer queue.
303
+ - `max_bytes` — Maximum amount of data the broker shall return for a Fetch request.
304
+ - `min_message_queue_size` — The minimum number of messages in the local consumer queue.
278
305
 
279
306
  The memory usage limit is roughly estimated as `max_bytes * min_message_queue_size`, plus whatever your application uses.
280
307
 
281
308
  #### SSL encryption, authentication & authorization
282
309
 
283
- * `security_protocol` – Protocol used to communicate with brokers (`:ssl`)
284
- * `ssl_ca_location` – File or directory path to CA certificate(s) for verifying the broker's key
285
- * `ssl_crl_location` – Path to CRL for verifying broker's certificate validity
286
- * `ssl_keystore_location` – Path to client's keystore (PKCS#12) used for authentication
287
- * `ssl_keystore_password` – Client's keystore (PKCS#12) password
288
- * `ssl_certificate_location` – Path to the certificate
289
- * `ssl_key_location` – Path to client's certificate used for authentication
290
- * `ssl_key_password` – Client's certificate password
310
+ - `security_protocol` – Protocol used to communicate with brokers (`:ssl`)
311
+ - `ssl_ca_location` – File or directory path to CA certificate(s) for verifying the broker's key
312
+ - `ssl_crl_location` – Path to CRL for verifying broker's certificate validity
313
+ - `ssl_keystore_location` – Path to client's keystore (PKCS#12) used for authentication
314
+ - `ssl_keystore_password` – Client's keystore (PKCS#12) password
315
+ - `ssl_certificate_location` – Path to the certificate
316
+ - `ssl_key_location` – Path to client's certificate used for authentication
317
+ - `ssl_key_password` – Client's certificate password
291
318
 
292
319
  #### SASL encryption, authentication & authorization
293
320
 
294
321
  Racecar has support for using SASL to authenticate clients using either the GSSAPI or PLAIN mechanism either via plaintext or SSL connection.
295
322
 
296
- * `security_protocol` – Protocol used to communicate with brokers (`:sasl_plaintext` `:sasl_ssl`)
297
- * `sasl_mechanism` – SASL mechanism to use for authentication (`GSSAPI` `PLAIN` `SCRAM-SHA-256` `SCRAM-SHA-512`)
323
+ - `security_protocol` – Protocol used to communicate with brokers (`:sasl_plaintext` `:sasl_ssl`)
324
+ - `sasl_mechanism` – SASL mechanism to use for authentication (`GSSAPI` `PLAIN` `SCRAM-SHA-256` `SCRAM-SHA-512`)
298
325
 
299
- * `sasl_kerberos_principal` – This client's Kerberos principal name
300
- * `sasl_kerberos_kinit_cmd` – Full kerberos kinit command string, `%{config.prop.name}` is replaced by corresponding config object value, `%{broker.name}` returns the broker's hostname
301
- * `sasl_kerberos_keytab` – Path to Kerberos keytab file. Uses system default if not set
302
- * `sasl_kerberos_min_time_before_relogin` – Minimum time in milliseconds between key refresh attempts
303
- * `sasl_username` – SASL username for use with the PLAIN and SASL-SCRAM-.. mechanism
304
- * `sasl_password` – SASL password for use with the PLAIN and SASL-SCRAM-.. mechanism
326
+ - `sasl_kerberos_principal` – This client's Kerberos principal name
327
+ - `sasl_kerberos_kinit_cmd` – Full kerberos kinit command string, `%{config.prop.name}` is replaced by corresponding config object value, `%{broker.name}` returns the broker's hostname
328
+ - `sasl_kerberos_keytab` – Path to Kerberos keytab file. Uses system default if not set
329
+ - `sasl_kerberos_min_time_before_relogin` – Minimum time in milliseconds between key refresh attempts
330
+ - `sasl_username` – SASL username for use with the PLAIN and SASL-SCRAM-.. mechanism
331
+ - `sasl_password` – SASL password for use with the PLAIN and SASL-SCRAM-.. mechanism
305
332
 
306
333
  #### Producing messages
307
334
 
308
335
  These settings are related to consumers that _produce messages to Kafka_.
309
336
 
310
- * `producer_compression_codec` – If defined, Racecar will compress messages before writing them to Kafka. The codec needs to be one of `gzip`, `lz4`, or `snappy`, either as a Symbol or a String.
337
+ - `producer_compression_codec` – If defined, Racecar will compress messages before writing them to Kafka. The codec needs to be one of `gzip`, `lz4`, or `snappy`, either as a Symbol or a String.
311
338
 
312
339
  #### Datadog monitoring
313
340
 
314
341
  Racecar supports [Datadog](https://www.datadoghq.com/) monitoring integration. If you're running a normal Datadog agent on your host, you just need to set `datadog_enabled` to `true`, as the rest of the settings come with sane defaults.
315
342
 
316
- * `datadog_enabled` – Whether Datadog monitoring is enabled (defaults to `false`).
317
- * `datadog_host` – The host running the Datadog agent.
318
- * `datadog_port` – The port of the Datadog agent.
319
- * `datadog_namespace` – The namespace to use for Datadog metrics.
320
- * `datadog_tags` – Tags that should always be set on Datadog metrics.
343
+ - `datadog_enabled` – Whether Datadog monitoring is enabled (defaults to `false`).
344
+ - `datadog_host` – The host running the Datadog agent.
345
+ - `datadog_port` – The port of the Datadog agent.
346
+ - `datadog_namespace` – The namespace to use for Datadog metrics.
347
+ - `datadog_tags` – Tags that should always be set on Datadog metrics.
321
348
 
322
- #### Consumers Without Rails ####
349
+ Furthermore, there's a [standard Datadog dashboard configution file](https://raw.githubusercontent.com/zendesk/racecar/master/extra/datadog-dashboard.json) that you can import to get started with a Racecar dashboard for all of your consumers.
350
+
351
+ #### Consumers Without Rails
323
352
 
324
353
  By default, if Rails is detected, it will be automatically started when the consumer is started. There are cases where you might not want or need Rails. You can pass the `--without-rails` option when starting the consumer and Rails won't be started.
325
354
 
@@ -357,7 +386,6 @@ describe CreateContactsConsumer do
357
386
  end
358
387
  ```
359
388
 
360
-
361
389
  ### Deploying consumers
362
390
 
363
391
  If you're already deploying your Rails application using e.g. [Capistrano](http://capistranorb.com/), all you need to do to run your Racecar consumers in production is to have some _process supervisor_ start the processes and manage them for you.
@@ -369,7 +397,7 @@ racecar-process-payments: bundle exec racecar ProcessPaymentsConsumer
369
397
  racecar-resize-images: bundle exec racecar ResizeImagesConsumer
370
398
  ```
371
399
 
372
- If you've ever used Heroku you'll recognize the format – indeed, deploying to Heroku should just work if you add Racecar invocations to your Procfile.
400
+ If you've ever used Heroku you'll recognize the format – indeed, deploying to Heroku should just work if you add Racecar invocations to your Procfile and [enable the Heroku integration](#deploying-to-heroku)
373
401
 
374
402
  With Foreman, you can easily run these processes locally by executing `foreman run`; in production you'll want to _export_ to another process management format such as Upstart or Runit. [capistrano-foreman](https://github.com/hyperoslo/capistrano-foreman) allows you to do this with Capistrano.
375
403
 
@@ -397,20 +425,37 @@ spec:
397
425
  app: my-racecar
398
426
  spec:
399
427
  containers:
400
- - name: my-racecar
401
- image: my-racecar-image
402
- command: ["bundle", "exec", "racecar", "MyConsumer"]
403
- env: # <-- you can configure the consumer using environment variables!
404
- - name: RACECAR_BROKERS
405
- value: kafka1,kafka2,kafka3
406
- - name: RACECAR_OFFSET_COMMIT_INTERVAL
407
- value: 5
428
+ - name: my-racecar
429
+ image: my-racecar-image
430
+ command: ["bundle", "exec", "racecar", "MyConsumer"]
431
+ env: # <-- you can configure the consumer using environment variables!
432
+ - name: RACECAR_BROKERS
433
+ value: kafka1,kafka2,kafka3
434
+ - name: RACECAR_OFFSET_COMMIT_INTERVAL
435
+ value: 5
408
436
  ```
409
437
 
410
438
  The important part is the `strategy.type` value, which tells Kubernetes how to upgrade from one version of your Deployment to another. Many services use so-called _rolling updates_, where some but not all containers are replaced with the new version. This is done so that, if the new version doesn't work, the old version is still there to serve most of the requests. For Kafka consumers, this doesn't work well. The reason is that every time a consumer joins or leaves a group, every other consumer in the group needs to stop and synchronize the list of partitions assigned to each group member. So if the group is updated in a rolling fashion, this synchronization would occur over and over again, causing undesirable double-processing of messages as consumers would start only to be synchronized shortly after.
411
439
 
412
440
  Instead, the `Recreate` update strategy should be used. It completely tears down the existing containers before starting all of the new containers simultaneously, allowing for a single synchronization stage and a much faster, more stable deployment update.
413
441
 
442
+ #### Deploying to Heroku
443
+
444
+ If you run your applications in Heroku and/or use the Heroku Kafka add-on, you application will be provided with 4 ENV variables that allow connecting to the cluster: `KAFKA_URL`, `KAFKA_TRUSTED_CERT`, `KAFKA_CLIENT_CERT`, and `KAFKA_CLIENT_CERT_KEY`.
445
+
446
+ Racecar has a built-in helper for configuring your application based on these variables – just add `require "racecar/heroku"` and everything should just work.
447
+
448
+ Please note aliasing the Heroku Kafka add-on will break this integration. If you have a need to do that, please ask on [the discussion board](https://github.com/zendesk/racecar/discussions).
449
+
450
+ ```ruby
451
+ # This takes care of setting up your consumer based on the ENV
452
+ # variables provided by Heroku.
453
+ require "racecar/heroku"
454
+
455
+ class SomeConsumer < Racecar::Consumer
456
+ # ...
457
+ end
458
+ ```
414
459
 
415
460
  #### Running consumers in the background
416
461
 
@@ -428,7 +473,6 @@ Since the process is daemonized, you need to know the process id (PID) in order
428
473
 
429
474
  Again, the recommended approach is to manage the processes using process managers. Only do this if you have to.
430
475
 
431
-
432
476
  ### Handling errors
433
477
 
434
478
  When processing messages from a Kafka topic, your code may encounter an error and raise an exception. The cause is typically one of two things:
@@ -466,45 +510,49 @@ end
466
510
 
467
511
  It is highly recommended that you set up an error handler. Please note that the `info` object contains different keys and values depending on whether you are using `process` or `process_batch`. See the `instrumentation_payload` object in the `process` and `process_batch` methods in the `Runner` class for the complete list.
468
512
 
469
-
470
513
  ### Logging
471
514
 
472
515
  By default, Racecar will log to `STDOUT`. If you're using Rails, your application code will use whatever logger you've configured there.
473
516
 
474
517
  In order to make Racecar log its own operations to a log file, set the `logfile` configuration variable or pass `--log filename.log` to the `racecar` command.
475
518
 
476
-
477
519
  ### Operations
478
520
 
479
521
  In order to gracefully shut down a Racecar consumer process, send it the `SIGTERM` signal. Most process supervisors such as Runit and Kubernetes send this signal when shutting down a process, so using those systems will make things easier.
480
522
 
481
523
  In order to introspect the configuration of a consumer process, send it the `SIGUSR1` signal. This will make Racecar print its configuration to the standard error file descriptor associated with the consumer process, so you'll need to know where that is written to.
482
524
 
483
-
484
525
  ### Upgrading from v1 to v2
485
526
 
486
527
  In order to safely upgrade from Racecar v1 to v2, you need to completely shut down your consumer group before starting it up again with the v2 Racecar dependency. In general, you should avoid rolling deploys for consumers groups, so it is likely the case that this will just work for you, but it's a good idea to check first.
487
528
 
488
-
489
529
  ## Development
490
530
 
491
531
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rspec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
492
532
 
493
533
  The integration tests run against a Kafka instance that is not automatically started from within `rspec`. You can set one up using the provided `docker-compose.yml` by running `docker-compose up`.
494
534
 
535
+ ### Running RSpec within Docker
536
+
537
+ There can be behavioural inconsistencies between running the specs on your machine, and in the CI pipeline. Due to this, there is now a Dockerfile included in the project, which is based on the CircleCI ruby 2.7.2 image. This could easily be extended with more Dockerfiles to cover different Ruby versions if desired. In order to run the specs via Docker:
538
+
539
+ - Uncomment the `tests` service from the docker-compose.yml
540
+ - Bring up the stack with `docker-compose up -d`
541
+ - Execute the entire suite with `docker-compose run --rm tests rspec`
542
+ - Execute a single spec or directory with `docker-compose run --rm tests rspec spec/integration/consumer_spec.rb`
543
+
544
+ Please note - your code directory is mounted as a volume, so you can make code changes without needing to rebuild
495
545
 
496
546
  ## Contributing
497
547
 
498
548
  Bug reports and pull requests are welcome on [GitHub](https://github.com/zendesk/racecar). Feel free to [join our Slack team](https://ruby-kafka-slack.herokuapp.com/) and ask how best to contribute!
499
549
 
500
-
501
550
  ## Support and Discussion
502
551
 
503
552
  If you've discovered a bug, please file a [Github issue](https://github.com/zendesk/racecar/issues/new), and make sure to include all the relevant information, including the version of Racecar, rdkafka-ruby, and Kafka that you're using.
504
553
 
505
554
  If you have other questions, or would like to discuss best practises, or how to contribute to the project, [join our Slack team](https://ruby-kafka-slack.herokuapp.com/)!
506
555
 
507
-
508
556
  ## Copyright and license
509
557
 
510
558
  Copyright 2017 Daniel Schierbeck & Zendesk
data/docker-compose.yml CHANGED
@@ -1,19 +1,19 @@
1
- version: '2'
1
+ version: '2.1'
2
+
2
3
  services:
3
4
  zookeeper:
4
5
  image: confluentinc/cp-zookeeper:5.5.1
5
- hostname: zookeeper
6
- container_name: zookeeper
7
6
  ports:
8
7
  - "2181:2181"
9
8
  environment:
10
9
  ZOOKEEPER_CLIENT_PORT: 2181
11
10
  ZOOKEEPER_TICK_TIME: 2000
11
+ KAFKA_OPTS: "-Dzookeeper.4lw.commands.whitelist=*"
12
+ healthcheck:
13
+ test: echo ruok | nc 127.0.0.1 2181 | grep imok
12
14
 
13
15
  broker:
14
16
  image: confluentinc/cp-kafka:5.5.1
15
- hostname: broker
16
- container_name: broker
17
17
  depends_on:
18
18
  - zookeeper
19
19
  ports:
@@ -30,3 +30,36 @@ services:
30
30
  KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1
31
31
  KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0
32
32
  KAFKA_JMX_PORT: 9101
33
+ KAFKA_DELETE_TOPIC_ENABLE: 'true'
34
+ healthcheck:
35
+ test: nc -z 127.0.0.1 9092
36
+
37
+ wait-for-healthy-services:
38
+ image: alpine
39
+ depends_on:
40
+ broker:
41
+ condition: service_healthy
42
+ zookeeper:
43
+ condition: service_healthy
44
+
45
+
46
+ # If you want to run the tests locally with Docker, comment in the tests service.
47
+ # The behaviour, especially of the integration tests, can differ somewhat compared
48
+ # to running it on your machine.
49
+
50
+ # tests:
51
+ # build:
52
+ # context: .
53
+ # depends_on:
54
+ # wait-for-healthy-services:
55
+ # condition: service_started
56
+ # environment:
57
+ # RACECAR_BROKERS: broker:29092
58
+ # DOCKER_SUDO: 'true'
59
+ # # When bringing up the stack, we just let the container exit. For running the
60
+ # # specs, we'll use commands like `docker-compose run tests rspec`
61
+ # command: ["echo", "ready"]
62
+ # volumes:
63
+ # # The line below allows us to run docker commands from the container itself
64
+ # - "/var/run/docker.sock:/var/run/docker.sock"
65
+ # - .:/app