ruby-kafka 0.3.8 → 0.3.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +0 -1
- data/CHANGELOG.md +8 -0
- data/Gemfile.lock +81 -0
- data/Procfile +2 -0
- data/README.md +151 -21
- data/ci/consumer.rb +18 -0
- data/ci/init.rb +17 -0
- data/ci/producer.rb +25 -0
- data/lib/kafka/client.rb +89 -10
- data/lib/kafka/cluster.rb +2 -5
- data/lib/kafka/compressor.rb +1 -1
- data/lib/kafka/consumer.rb +10 -2
- data/lib/kafka/datadog.rb +208 -0
- data/lib/kafka/fetch_operation.rb +1 -1
- data/lib/kafka/fetched_batch.rb +12 -0
- data/lib/kafka/instrumenter.rb +11 -0
- data/lib/kafka/pending_message.rb +9 -16
- data/lib/kafka/produce_operation.rb +1 -0
- data/lib/kafka/producer.rb +8 -7
- data/lib/kafka/socket_with_timeout.rb +2 -0
- data/lib/kafka/version.rb +1 -1
- data/performance/profile.rb +39 -0
- data/ruby-kafka.gemspec +3 -1
- metadata +39 -10
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA1:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 0681733f79e04de3118e211640474ed752c8b340
         | 
| 4 | 
            +
              data.tar.gz: d35d33238ec84ad331c7af167ac3464c29bc65ff
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 7b040e4e49d312961a23f4b42238877e0c014734c4f99de3df41f0c2db011afe12f3dd91d7edaaa016131c28925233c27482fd1d55a7207eeb2e767f2996151c
         | 
| 7 | 
            +
              data.tar.gz: cb3b411ff48f4ae6868adbec7f122e6793d50ea93cb8acfd7c5228ab20ef023e43a50b136249939154649b75dc4b09b6ea0c611e6011ef9c79b2d01c17d63edb
         | 
    
        data/.gitignore
    CHANGED
    
    
    
        data/CHANGELOG.md
    CHANGED
    
    | @@ -4,6 +4,14 @@ Changes and additions to the library will be listed here. | |
| 4 4 |  | 
| 5 5 | 
             
            ## Unreleased
         | 
| 6 6 |  | 
| 7 | 
            +
            ## v0.3.9
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            - Improve the default durability setting. The producer setting `required_acks` now defaults to `:all` (#210).
         | 
| 10 | 
            +
            - Handle rebalances in the producer (#196). *Mpampis Kostas*
         | 
| 11 | 
            +
            - Add simplified producer and consumer APIs for simple use cases.
         | 
| 12 | 
            +
            - Add out-of-the-box Datadog reporting.
         | 
| 13 | 
            +
            - Improve producer performance.
         | 
| 14 | 
            +
             | 
| 7 15 | 
             
            ## v0.3.8
         | 
| 8 16 |  | 
| 9 17 | 
             
            - Keep separate connection pools for consumers and producers initialized from
         | 
    
        data/Gemfile.lock
    ADDED
    
    | @@ -0,0 +1,81 @@ | |
| 1 | 
            +
            PATH
         | 
| 2 | 
            +
              remote: .
         | 
| 3 | 
            +
              specs:
         | 
| 4 | 
            +
                ruby-kafka (0.3.8)
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            GEM
         | 
| 7 | 
            +
              remote: https://rubygems.org/
         | 
| 8 | 
            +
              specs:
         | 
| 9 | 
            +
                activesupport (4.2.5)
         | 
| 10 | 
            +
                  i18n (~> 0.7)
         | 
| 11 | 
            +
                  json (~> 1.7, >= 1.7.7)
         | 
| 12 | 
            +
                  minitest (~> 5.1)
         | 
| 13 | 
            +
                  thread_safe (~> 0.3, >= 0.3.4)
         | 
| 14 | 
            +
                  tzinfo (~> 1.1)
         | 
| 15 | 
            +
                benchmark-perf (0.1.0)
         | 
| 16 | 
            +
                builder (3.2.2)
         | 
| 17 | 
            +
                coderay (1.1.0)
         | 
| 18 | 
            +
                colored (1.2)
         | 
| 19 | 
            +
                diff-lcs (1.2.5)
         | 
| 20 | 
            +
                docker-api (1.25.0)
         | 
| 21 | 
            +
                  excon (>= 0.38.0)
         | 
| 22 | 
            +
                  json
         | 
| 23 | 
            +
                dogstatsd-ruby (1.6.0)
         | 
| 24 | 
            +
                dotenv (2.1.0)
         | 
| 25 | 
            +
                excon (0.45.4)
         | 
| 26 | 
            +
                i18n (0.7.0)
         | 
| 27 | 
            +
                json (1.8.3)
         | 
| 28 | 
            +
                method_source (0.8.2)
         | 
| 29 | 
            +
                minitest (5.8.3)
         | 
| 30 | 
            +
                pry (0.9.12.6)
         | 
| 31 | 
            +
                  coderay (~> 1.0)
         | 
| 32 | 
            +
                  method_source (~> 0.8)
         | 
| 33 | 
            +
                  slop (~> 3.4)
         | 
| 34 | 
            +
                rake (10.5.0)
         | 
| 35 | 
            +
                rspec (3.4.0)
         | 
| 36 | 
            +
                  rspec-core (~> 3.4.0)
         | 
| 37 | 
            +
                  rspec-expectations (~> 3.4.0)
         | 
| 38 | 
            +
                  rspec-mocks (~> 3.4.0)
         | 
| 39 | 
            +
                rspec-benchmark (0.1.0)
         | 
| 40 | 
            +
                  benchmark-perf (~> 0.1.0)
         | 
| 41 | 
            +
                  rspec (>= 3.0.0, < 4.0.0)
         | 
| 42 | 
            +
                rspec-core (3.4.1)
         | 
| 43 | 
            +
                  rspec-support (~> 3.4.0)
         | 
| 44 | 
            +
                rspec-expectations (3.4.0)
         | 
| 45 | 
            +
                  diff-lcs (>= 1.2.0, < 2.0)
         | 
| 46 | 
            +
                  rspec-support (~> 3.4.0)
         | 
| 47 | 
            +
                rspec-mocks (3.4.1)
         | 
| 48 | 
            +
                  diff-lcs (>= 1.2.0, < 2.0)
         | 
| 49 | 
            +
                  rspec-support (~> 3.4.0)
         | 
| 50 | 
            +
                rspec-support (3.4.1)
         | 
| 51 | 
            +
                rspec_junit_formatter (0.2.2)
         | 
| 52 | 
            +
                  builder (< 4)
         | 
| 53 | 
            +
                  rspec-core (>= 2, < 4, != 2.12.0)
         | 
| 54 | 
            +
                ruby-prof (0.15.9)
         | 
| 55 | 
            +
                slop (3.6.0)
         | 
| 56 | 
            +
                snappy (0.0.12)
         | 
| 57 | 
            +
                thread_safe (0.3.5)
         | 
| 58 | 
            +
                tzinfo (1.2.2)
         | 
| 59 | 
            +
                  thread_safe (~> 0.1)
         | 
| 60 | 
            +
             | 
| 61 | 
            +
            PLATFORMS
         | 
| 62 | 
            +
              ruby
         | 
| 63 | 
            +
             | 
| 64 | 
            +
            DEPENDENCIES
         | 
| 65 | 
            +
              activesupport
         | 
| 66 | 
            +
              bundler (>= 1.9.5)
         | 
| 67 | 
            +
              colored
         | 
| 68 | 
            +
              docker-api
         | 
| 69 | 
            +
              dogstatsd-ruby
         | 
| 70 | 
            +
              dotenv
         | 
| 71 | 
            +
              pry
         | 
| 72 | 
            +
              rake (~> 10.0)
         | 
| 73 | 
            +
              rspec
         | 
| 74 | 
            +
              rspec-benchmark
         | 
| 75 | 
            +
              rspec_junit_formatter (= 0.2.2)
         | 
| 76 | 
            +
              ruby-kafka!
         | 
| 77 | 
            +
              ruby-prof
         | 
| 78 | 
            +
              snappy
         | 
| 79 | 
            +
             | 
| 80 | 
            +
            BUNDLED WITH
         | 
| 81 | 
            +
               1.10.6
         | 
    
        data/Procfile
    ADDED
    
    
    
        data/README.md
    CHANGED
    
    | @@ -9,25 +9,31 @@ Although parts of this library work with Kafka 0.8 – specifically, the Produce | |
| 9 9 | 
             
            ## Table of Contents
         | 
| 10 10 |  | 
| 11 11 | 
             
            1. [Installation](#installation)
         | 
| 12 | 
            -
            2. [ | 
| 13 | 
            -
             | 
| 12 | 
            +
            2. [Compatibility](#compatibility)
         | 
| 13 | 
            +
            3. [Usage](#usage)
         | 
| 14 | 
            +
              1. [Setting up the Kafka Client](#setting-up-the-kafka-client)
         | 
| 15 | 
            +
              2. [Producing Messages to Kafka](#producing-messages-to-kafka)
         | 
| 16 | 
            +
                1. [Efficiently Producing Messages](#efficiently-producing-messages)
         | 
| 14 17 | 
             
                1. [Asynchronously Producing Messages](#asynchronously-producing-messages)
         | 
| 15 18 | 
             
                2. [Serialization](#serialization)
         | 
| 16 19 | 
             
                3. [Partitioning](#partitioning)
         | 
| 17 20 | 
             
                4. [Buffering and Error Handling](#buffering-and-error-handling)
         | 
| 18 | 
            -
                5. [Message  | 
| 19 | 
            -
                6. [ | 
| 20 | 
            -
                7. [ | 
| 21 | 
            -
             | 
| 21 | 
            +
                5. [Message Durability](#message-durability)
         | 
| 22 | 
            +
                6. [Message Delivery Guarantees](#message-delivery-guarantees)
         | 
| 23 | 
            +
                7. [Compression](#compression)
         | 
| 24 | 
            +
                8. [Producing Messages from a Rails Application](#producing-messages-from-a-rails-application)
         | 
| 25 | 
            +
              3. [Consuming Messages from Kafka](#consuming-messages-from-kafka)
         | 
| 22 26 | 
             
                1. [Consumer Checkpointing](#consumer-checkpointing)
         | 
| 23 | 
            -
                2. [ | 
| 24 | 
            -
             | 
| 25 | 
            -
             | 
| 26 | 
            -
               | 
| 27 | 
            -
               | 
| 28 | 
            -
               | 
| 29 | 
            -
             | 
| 30 | 
            -
             | 
| 27 | 
            +
                2. [Topic Subscriptions](#topic-subscriptions)
         | 
| 28 | 
            +
                3. [Consuming Messages in Batches](#consuming-messages-in-batches)
         | 
| 29 | 
            +
                4. [Balancing Throughput and Latency](#balancing-throughput-and-latency)
         | 
| 30 | 
            +
              4. [Thread Safety](#thread-safety)
         | 
| 31 | 
            +
              5. [Logging](#logging)
         | 
| 32 | 
            +
              6. [Instrumentation](#instrumentation)
         | 
| 33 | 
            +
              7. [Understanding Timeouts](#understanding-timeouts)
         | 
| 34 | 
            +
              8. [Encryption and Authentication using SSL](#encryption-and-authentication-using-ssl)
         | 
| 35 | 
            +
            4. [Development](#development)
         | 
| 36 | 
            +
            5. [Roadmap](#roadmap)
         | 
| 31 37 |  | 
| 32 38 | 
             
            ## Installation
         | 
| 33 39 |  | 
| @@ -45,21 +51,76 @@ Or install it yourself as: | |
| 45 51 |  | 
| 46 52 | 
             
                $ gem install ruby-kafka
         | 
| 47 53 |  | 
| 54 | 
            +
            ## Compatibility
         | 
| 55 | 
            +
             | 
| 56 | 
            +
            <table>
         | 
| 57 | 
            +
              <tr>
         | 
| 58 | 
            +
                <th></th>
         | 
| 59 | 
            +
                <th>Kafka 0.8</th>
         | 
| 60 | 
            +
                <th>Kafka 0.9</th>
         | 
| 61 | 
            +
                <th>Kafka 0.10</th>
         | 
| 62 | 
            +
              </tr>
         | 
| 63 | 
            +
              <tr>
         | 
| 64 | 
            +
                <th>Producer API</th>
         | 
| 65 | 
            +
                <td>Full support</td>
         | 
| 66 | 
            +
                <td>Full support</td>
         | 
| 67 | 
            +
                <td>Limited support</td>
         | 
| 68 | 
            +
              </tr>
         | 
| 69 | 
            +
              <tr>
         | 
| 70 | 
            +
                <th>Consumer API</th>
         | 
| 71 | 
            +
                <td>Unsupported</td>
         | 
| 72 | 
            +
                <td>Full support</td>
         | 
| 73 | 
            +
                <td>Limited support</td>
         | 
| 74 | 
            +
              </tr>
         | 
| 75 | 
            +
            </table>
         | 
| 76 | 
            +
             | 
| 77 | 
            +
            This library is targeting Kafka 0.9, although there is limited support for versions 0.8 and 0.10:
         | 
| 78 | 
            +
             | 
| 79 | 
            +
            - **Kafka 0.8:** Full support for the Producer API, but no support for consumer groups. Simple message fetching works.
         | 
| 80 | 
            +
            - **Kafka 0.10:** Full support for both the Producer and Consumer APIs, but the addition of message timestamps is not supported. However, ruby-kafka should be completely compatible with Kafka 0.10 brokers.
         | 
| 81 | 
            +
             | 
| 82 | 
            +
            This library requires Ruby 2.1 or higher.
         | 
| 83 | 
            +
             | 
| 48 84 | 
             
            ## Usage
         | 
| 49 85 |  | 
| 50 86 | 
             
            Please see the [documentation site](http://www.rubydoc.info/gems/ruby-kafka) for detailed documentation on the latest release. Note that the documentation on GitHub may not match the version of the library you're using – there are still being made many changes to the API.
         | 
| 51 87 |  | 
| 52 | 
            -
            ###  | 
| 88 | 
            +
            ### Setting up the Kafka Client
         | 
| 53 89 |  | 
| 54 | 
            -
            A client must be initialized with at least one Kafka broker. Each client keeps a separate pool of broker connections. Don't use the same client from more than one thread.
         | 
| 90 | 
            +
            A client must be initialized with at least one Kafka broker, from which the entire Kafka cluster will be discovered. Each client keeps a separate pool of broker connections. Don't use the same client from more than one thread.
         | 
| 55 91 |  | 
| 56 92 | 
             
            ```ruby
         | 
| 57 93 | 
             
            require "kafka"
         | 
| 58 94 |  | 
| 59 | 
            -
            kafka = Kafka.new( | 
| 95 | 
            +
            kafka = Kafka.new(
         | 
| 96 | 
            +
              # At least one of these nodes must be available:
         | 
| 97 | 
            +
              seed_brokers: ["kafka1:9092", "kafka2:9092"],
         | 
| 98 | 
            +
              
         | 
| 99 | 
            +
              # Set an optional client id in order to identify the client to Kafka:
         | 
| 100 | 
            +
              client_id: "my-application",
         | 
| 101 | 
            +
            )
         | 
| 60 102 | 
             
            ```
         | 
| 61 103 |  | 
| 62 | 
            -
             | 
| 104 | 
            +
            ### Producing Messages to Kafka
         | 
| 105 | 
            +
             | 
| 106 | 
            +
            The simplest way to write a message to a Kafka topic is to call `#deliver_message`:
         | 
| 107 | 
            +
             | 
| 108 | 
            +
            ```ruby
         | 
| 109 | 
            +
            kafka = Kafka.new(...)
         | 
| 110 | 
            +
            kafka.deliver_message("Hello, World!", topic: "greetings")
         | 
| 111 | 
            +
            ```
         | 
| 112 | 
            +
             | 
| 113 | 
            +
            This will write the message to a random partition in the `greetings` topic.
         | 
| 114 | 
            +
             | 
| 115 | 
            +
            #### Efficiently Producing Messages
         | 
| 116 | 
            +
             | 
| 117 | 
            +
            While `#deliver_message` works fine for infrequent writes, there are a number of downside:
         | 
| 118 | 
            +
             | 
| 119 | 
            +
            * Kafka is optimized for transmitting _batches_ of messages rather than individual messages, so there's a significant overhead and performance penalty in using the single-message API.
         | 
| 120 | 
            +
            * The message delivery can fail in a number of different ways, but this simplistic API does not provide automatic retries.
         | 
| 121 | 
            +
            * The message is not buffered, so if there is an error, it is lost.
         | 
| 122 | 
            +
             | 
| 123 | 
            +
            The Producer API solves all these problems and more:
         | 
| 63 124 |  | 
| 64 125 | 
             
            ```ruby
         | 
| 65 126 | 
             
            producer = kafka.producer
         | 
| @@ -113,8 +174,9 @@ producer.produce("hello", topic: "greetings") | |
| 113 174 | 
             
            # `#deliver_messages` will return immediately.
         | 
| 114 175 | 
             
            producer.deliver_messages
         | 
| 115 176 |  | 
| 116 | 
            -
            # Make sure to call `#shutdown` on the producer in order to
         | 
| 117 | 
            -
            #  | 
| 177 | 
            +
            # Make sure to call `#shutdown` on the producer in order to avoid leaking
         | 
| 178 | 
            +
            # resources. `#shutdown` will wait for any pending messages to be delivered
         | 
| 179 | 
            +
            # before returning.
         | 
| 118 180 | 
             
            producer.shutdown
         | 
| 119 181 | 
             
            ```
         | 
| 120 182 |  | 
| @@ -140,6 +202,8 @@ producer.produce("hello", topic: "greetings") | |
| 140 202 | 
             
            # ...
         | 
| 141 203 | 
             
            ```
         | 
| 142 204 |  | 
| 205 | 
            +
            When calling `#shutdown`, the producer will attempt to deliver the messages and the method call will block until that has happened. Note that there's no _guarantee_ that the messages will be delivered.
         | 
| 206 | 
            +
             | 
| 143 207 | 
             
            **Note:** if the calling thread produces messages faster than the producer can write them to Kafka, you'll eventually run into problems. The internal queue used for sending messages from the calling thread to the background worker has a size limit; once this limit is reached, a call to `#produce` will raise `Kafka::BufferOverflow`.
         | 
| 144 208 |  | 
| 145 209 | 
             
            #### Serialization
         | 
| @@ -231,6 +295,28 @@ Note that there's a maximum buffer size; pass in a different value for `max_buff | |
| 231 295 |  | 
| 232 296 | 
             
            A final note on buffers: local buffers give resilience against broker and network failures, and allow higher throughput due to message batching, but they also trade off consistency guarantees for higher availibility and resilience. If your local process dies while messages are buffered, those messages will be lost. If you require high levels of consistency, you should call `#deliver_messages` immediately after `#produce`.
         | 
| 233 297 |  | 
| 298 | 
            +
            #### Message Durability
         | 
| 299 | 
            +
             | 
| 300 | 
            +
            Once the client has delivered a set of messages to a Kafka broker the broker will forward them to its replicas, thus ensuring that a single broker failure will not result in message loss. However, the client can choose _when the leader acknowledges the write_. At one extreme, the client can choose fire-and-forget delivery, not even bothering to check whether the messages have been acknowledged. At the other end, the client can ask the broker to wait until _all_ its replicas have acknowledged the write before returning. This is the safest option, and the default. It's also possible to have the broker return as soon as it has written the messages to its own log but before the replicas have done so. This leaves a window of time where a failure of the leader will result in the messages being lost, although this should not be a common occurence.
         | 
| 301 | 
            +
             | 
| 302 | 
            +
            Write latency and throughput are negativaly impacted by having more replicas acknowledge a write, so if you require low-latency, high throughput writes you may want to accept lower durability.
         | 
| 303 | 
            +
             | 
| 304 | 
            +
            This behavior is controlled by the `required_acks` option to `#producer` and `#async_producer`:
         | 
| 305 | 
            +
             | 
| 306 | 
            +
            ```ruby
         | 
| 307 | 
            +
            # This is the default: all replicas must acknowledge.
         | 
| 308 | 
            +
            producer = kafka.producer(required_acks: :all)
         | 
| 309 | 
            +
             | 
| 310 | 
            +
            # This is fire-and-forget: messages can easily be lost.
         | 
| 311 | 
            +
            producer = kafka.producer(required_acks: 0)
         | 
| 312 | 
            +
             | 
| 313 | 
            +
            # This only waits for the leader to acknowledge.
         | 
| 314 | 
            +
            producer = kafka.producer(required_acks: 1)
         | 
| 315 | 
            +
            ```
         | 
| 316 | 
            +
             | 
| 317 | 
            +
            Unless you absolutely need lower latency it's highly recommended to use the default setting (`:all`).
         | 
| 318 | 
            +
             | 
| 319 | 
            +
             | 
| 234 320 | 
             
            #### Message Delivery Guarantees
         | 
| 235 321 |  | 
| 236 322 | 
             
            There are basically two different and incompatible guarantees that can be made in a message delivery system such as Kafka:
         | 
| @@ -369,7 +455,10 @@ While this is great for extremely simple use cases, there are a number of downsi | |
| 369 455 | 
             
            - If you want to have multiple processes consume from the same topic, there's no way of coordinating which processes should fetch from which partitions.
         | 
| 370 456 | 
             
            - If a process dies, there's no way to have another process resume fetching from the point in the partition that the original process had reached.
         | 
| 371 457 |  | 
| 372 | 
            -
             | 
| 458 | 
            +
             | 
| 459 | 
            +
            #### Consumer Groups
         | 
| 460 | 
            +
             | 
| 461 | 
            +
            The Consumer API solves all of the above issues, and more. It uses the Consumer Groups feature released in Kafka 0.9 to allow multiple consumer processes to coordinate access to a topic, assigning each partition to a single consumer. When a consumer fails, the partitions that were assigned to it are re-assigned to other members of the group.
         | 
| 373 462 |  | 
| 374 463 | 
             
            Using the API is simple:
         | 
| 375 464 |  | 
| @@ -415,6 +504,21 @@ consumer = kafka.consumer( | |
| 415 504 | 
             
            ```
         | 
| 416 505 |  | 
| 417 506 |  | 
| 507 | 
            +
            #### Topic Subscriptions
         | 
| 508 | 
            +
             | 
| 509 | 
            +
            For each topic subscription it's possible to decide whether to consume messages starting at the beginning of the topic or to just consume new messages that are produced to the topic. This policy is configured by setting the `start_from_beginning` argument when calling `#subscribe`:
         | 
| 510 | 
            +
             | 
| 511 | 
            +
            ```ruby
         | 
| 512 | 
            +
            # Consume messages from the very beginning of the topic. This is the default.
         | 
| 513 | 
            +
            consumer.subscribe("users", start_from_beginning: true)
         | 
| 514 | 
            +
             | 
| 515 | 
            +
            # Only consume new messages.
         | 
| 516 | 
            +
            consumer.subscribe("notifications", start_from_beginning: false)
         | 
| 517 | 
            +
            ```
         | 
| 518 | 
            +
             | 
| 519 | 
            +
            Once the consumer group has checkpointed its progress in the topic's partitions, the consumers will always start from the checkpointed offsets, regardless of `start_from_beginning`. As such, this setting only applies when the consumer initially starts consuming from a topic.
         | 
| 520 | 
            +
             | 
| 521 | 
            +
             | 
| 418 522 | 
             
            #### Consuming Messages in Batches
         | 
| 419 523 |  | 
| 420 524 | 
             
            Sometimes it is easier to deal with messages in batches rather than individually. A _batch_ is a sequence of one or more Kafka messages that all belong to the same topic and partition. One common reason to want to use batches is when some external system has a batch or transactional API.
         | 
| @@ -444,6 +548,32 @@ end | |
| 444 548 |  | 
| 445 549 | 
             
            One important thing to note is that the client commits the offset of the batch's messages only after the _entire_ batch has been processed.
         | 
| 446 550 |  | 
| 551 | 
            +
             | 
| 552 | 
            +
            #### Balancing Throughput and Latency
         | 
| 553 | 
            +
             | 
| 554 | 
            +
            There are two performance properties that can at times be at odds: _throughput_ and _latency_. Throughput is the number of messages that can be processed in a given timespan; latency is the time it takes from a message is written to a topic until it has been processed.
         | 
| 555 | 
            +
             | 
| 556 | 
            +
            In order to optimize for throughput, you want to make sure to fetch as many messages as possible every time you do a round trip to the Kafka cluster. This minimizes network overhead and allows processing data in big chunks.
         | 
| 557 | 
            +
             | 
| 558 | 
            +
            In order to optimize for low latency, you want to process a message as soon as possible, even if that means fetching a smaller batch of messages.
         | 
| 559 | 
            +
             | 
| 560 | 
            +
            There are two values that can be tuned in order to balance these two concerns: `min_bytes` and `max_wait_time`.
         | 
| 561 | 
            +
             | 
| 562 | 
            +
            * `min_bytes` is the minimum number of bytes to return from a single message fetch. By setting this to a high value you can increase the processing throughput. The default value is one byte.
         | 
| 563 | 
            +
            * `max_wait_time` is the maximum number of seconds to wait before returning data from a single message fetch. By setting this high you also increase the processing throughput – and by setting it low you set a bound on latency. This configuration overrides `min_bytes`, so you'll _always_ get data back within the time specified. The default value is five seconds.
         | 
| 564 | 
            +
             | 
| 565 | 
            +
            Both settings can be passed to either `#each_message` or `#each_batch`, e.g.
         | 
| 566 | 
            +
             | 
| 567 | 
            +
            ```ruby
         | 
| 568 | 
            +
            # Waits for data for up to 30 seconds, preferring to fetch at least 5KB at a time.
         | 
| 569 | 
            +
            consumer.each_message(min_bytes: 1024 * 5, max_wait_time: 30) do |message|
         | 
| 570 | 
            +
              # ...
         | 
| 571 | 
            +
            end
         | 
| 572 | 
            +
            ```
         | 
| 573 | 
            +
             | 
| 574 | 
            +
            If you want to have at most one second of latency, set `max_wait_time: 1`.
         | 
| 575 | 
            +
             | 
| 576 | 
            +
             | 
| 447 577 | 
             
            ### Thread Safety
         | 
| 448 578 |  | 
| 449 579 | 
             
            You typically don't want to share a Kafka client between threads, since the network communication is not synchronized. Furthermore, you should avoid using threads in a consumer unless you're very careful about waiting for all work to complete before returning from the `#each_message` or `#each_batch` block. This is because _checkpointing_ assumes that returning from the block means that the messages that have been yielded have been successfully processed.
         | 
    
        data/ci/consumer.rb
    ADDED
    
    | @@ -0,0 +1,18 @@ | |
| 1 | 
            +
            # Consumes messages from a Kafka topic.
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require_relative "init"
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            consumer = $kafka.consumer(group_id: "greetings-group")
         | 
| 6 | 
            +
            consumer.subscribe("greetings")
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            num_messages = 0
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            trap("TERM") { consumer.stop }
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            consumer.each_message do |message|
         | 
| 13 | 
            +
              num_messages += 1
         | 
| 14 | 
            +
             | 
| 15 | 
            +
              if num_messages % 1000 == 0
         | 
| 16 | 
            +
                puts "Processed #{num_messages} messages"
         | 
| 17 | 
            +
              end
         | 
| 18 | 
            +
            end
         | 
    
        data/ci/init.rb
    ADDED
    
    | @@ -0,0 +1,17 @@ | |
| 1 | 
            +
            $LOAD_PATH.unshift(File.expand_path("../../lib", __FILE__))
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require "kafka"
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            logger = Logger.new(STDOUT)
         | 
| 6 | 
            +
            logger.level = Logger::INFO
         | 
| 7 | 
            +
            logger.formatter = -> (_, _, _, msg) { msg }
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            STDOUT.sync = true
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            $kafka = Kafka.new(
         | 
| 12 | 
            +
              logger: logger,
         | 
| 13 | 
            +
              seed_brokers: ENV.fetch("HEROKU_KAFKA_URL"),
         | 
| 14 | 
            +
              ssl_ca_cert: ENV.fetch("HEROKU_KAFKA_TRUSTED_CERT"),
         | 
| 15 | 
            +
              ssl_client_cert: ENV.fetch("HEROKU_KAFKA_CLIENT_CERT"),
         | 
| 16 | 
            +
              ssl_client_cert_key: ENV.fetch("HEROKU_KAFKA_CLIENT_CERT_KEY"),
         | 
| 17 | 
            +
            )
         | 
    
        data/ci/producer.rb
    ADDED
    
    | @@ -0,0 +1,25 @@ | |
| 1 | 
            +
            # Continuously produces messages to a Kafka topic.
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require_relative "init"
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            producer = $kafka.async_producer(
         | 
| 6 | 
            +
              delivery_interval: 1,
         | 
| 7 | 
            +
              max_queue_size: 5_000,
         | 
| 8 | 
            +
              max_buffer_size: 10_000,
         | 
| 9 | 
            +
            )
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            num_messages = 0
         | 
| 12 | 
            +
            shutdown = false
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            trap("TERM") { shutdown = true }
         | 
| 15 | 
            +
             | 
| 16 | 
            +
            until shutdown
         | 
| 17 | 
            +
              begin
         | 
| 18 | 
            +
                producer.produce("hello", key: "world", topic: "greetings")
         | 
| 19 | 
            +
              rescue Kafka::BufferOverflow
         | 
| 20 | 
            +
                puts "Buffer overflow, backing off..."
         | 
| 21 | 
            +
                sleep 10
         | 
| 22 | 
            +
              end
         | 
| 23 | 
            +
            end
         | 
| 24 | 
            +
             | 
| 25 | 
            +
            producer.shutdown
         | 
    
        data/lib/kafka/client.rb
    CHANGED
    
    | @@ -59,13 +59,64 @@ module Kafka | |
| 59 59 | 
             
                  @cluster = initialize_cluster
         | 
| 60 60 | 
             
                end
         | 
| 61 61 |  | 
| 62 | 
            +
                def deliver_message(value, key: nil, topic:, partition: nil, partition_key: nil)
         | 
| 63 | 
            +
                  create_time = Time.now
         | 
| 64 | 
            +
             | 
| 65 | 
            +
                  message = PendingMessage.new(
         | 
| 66 | 
            +
                    value,
         | 
| 67 | 
            +
                    key,
         | 
| 68 | 
            +
                    topic,
         | 
| 69 | 
            +
                    partition,
         | 
| 70 | 
            +
                    partition_key,
         | 
| 71 | 
            +
                    create_time,
         | 
| 72 | 
            +
                    key.to_s.bytesize + value.to_s.bytesize
         | 
| 73 | 
            +
                  )
         | 
| 74 | 
            +
             | 
| 75 | 
            +
                  if partition.nil?
         | 
| 76 | 
            +
                    partition_count = @cluster.partitions_for(topic).count
         | 
| 77 | 
            +
                    partition = Partitioner.partition_for_key(partition_count, message)
         | 
| 78 | 
            +
                  end
         | 
| 79 | 
            +
             | 
| 80 | 
            +
                  buffer = MessageBuffer.new
         | 
| 81 | 
            +
             | 
| 82 | 
            +
                  buffer.write(
         | 
| 83 | 
            +
                    value: message.value,
         | 
| 84 | 
            +
                    key: message.key,
         | 
| 85 | 
            +
                    topic: message.topic,
         | 
| 86 | 
            +
                    partition: partition,
         | 
| 87 | 
            +
                    create_time: message.create_time,
         | 
| 88 | 
            +
                  )
         | 
| 89 | 
            +
             | 
| 90 | 
            +
                  @cluster.add_target_topics([topic])
         | 
| 91 | 
            +
             | 
| 92 | 
            +
                  compressor = Compressor.new(
         | 
| 93 | 
            +
                    instrumenter: @instrumenter,
         | 
| 94 | 
            +
                  )
         | 
| 95 | 
            +
             | 
| 96 | 
            +
                  operation = ProduceOperation.new(
         | 
| 97 | 
            +
                    cluster: @cluster,
         | 
| 98 | 
            +
                    buffer: buffer,
         | 
| 99 | 
            +
                    required_acks: 1,
         | 
| 100 | 
            +
                    ack_timeout: 10,
         | 
| 101 | 
            +
                    compressor: compressor,
         | 
| 102 | 
            +
                    logger: @logger,
         | 
| 103 | 
            +
                    instrumenter: @instrumenter,
         | 
| 104 | 
            +
                  )
         | 
| 105 | 
            +
             | 
| 106 | 
            +
                  operation.execute
         | 
| 107 | 
            +
             | 
| 108 | 
            +
                  unless buffer.empty?
         | 
| 109 | 
            +
                    raise DeliveryFailed
         | 
| 110 | 
            +
                  end
         | 
| 111 | 
            +
                end
         | 
| 112 | 
            +
             | 
| 62 113 | 
             
                # Initializes a new Kafka producer.
         | 
| 63 114 | 
             
                #
         | 
| 64 115 | 
             
                # @param ack_timeout [Integer] The number of seconds a broker can wait for
         | 
| 65 116 | 
             
                #   replicas to acknowledge a write before responding with a timeout.
         | 
| 66 117 | 
             
                #
         | 
| 67 | 
            -
                # @param required_acks [Integer] The number of replicas that must acknowledge
         | 
| 68 | 
            -
                #   a write.
         | 
| 118 | 
            +
                # @param required_acks [Integer, Symbol] The number of replicas that must acknowledge
         | 
| 119 | 
            +
                #   a write, or `:all` if all in-sync replicas must acknowledge.
         | 
| 69 120 | 
             
                #
         | 
| 70 121 | 
             
                # @param max_retries [Integer] the number of retries that should be attempted
         | 
| 71 122 | 
             
                #   before giving up sending messages to the cluster. Does not include the
         | 
| @@ -89,7 +140,7 @@ module Kafka | |
| 89 140 | 
             
                #   are per-partition rather than per-topic or per-producer.
         | 
| 90 141 | 
             
                #
         | 
| 91 142 | 
             
                # @return [Kafka::Producer] the Kafka producer.
         | 
| 92 | 
            -
                def producer(compression_codec: nil, compression_threshold: 1, ack_timeout: 5, required_acks:  | 
| 143 | 
            +
                def producer(compression_codec: nil, compression_threshold: 1, ack_timeout: 5, required_acks: :all, max_retries: 2, retry_backoff: 1, max_buffer_size: 1000, max_buffer_bytesize: 10_000_000)
         | 
| 93 144 | 
             
                  compressor = Compressor.new(
         | 
| 94 145 | 
             
                    codec_name: compression_codec,
         | 
| 95 146 | 
             
                    threshold: compression_threshold,
         | 
| @@ -152,6 +203,10 @@ module Kafka | |
| 152 203 | 
             
                def consumer(group_id:, session_timeout: 30, offset_commit_interval: 10, offset_commit_threshold: 0, heartbeat_interval: 10)
         | 
| 153 204 | 
             
                  cluster = initialize_cluster
         | 
| 154 205 |  | 
| 206 | 
            +
                  instrumenter = DecoratingInstrumenter.new(@instrumenter, {
         | 
| 207 | 
            +
                    group_id: group_id,
         | 
| 208 | 
            +
                  })
         | 
| 209 | 
            +
             | 
| 155 210 | 
             
                  group = ConsumerGroup.new(
         | 
| 156 211 | 
             
                    cluster: cluster,
         | 
| 157 212 | 
             
                    logger: @logger,
         | 
| @@ -174,7 +229,7 @@ module Kafka | |
| 174 229 | 
             
                  Consumer.new(
         | 
| 175 230 | 
             
                    cluster: cluster,
         | 
| 176 231 | 
             
                    logger: @logger,
         | 
| 177 | 
            -
                    instrumenter:  | 
| 232 | 
            +
                    instrumenter: instrumenter,
         | 
| 178 233 | 
             
                    group: group,
         | 
| 179 234 | 
             
                    offset_manager: offset_manager,
         | 
| 180 235 | 
             
                    session_timeout: session_timeout,
         | 
| @@ -256,6 +311,32 @@ module Kafka | |
| 256 311 | 
             
                  operation.execute.flat_map {|batch| batch.messages }
         | 
| 257 312 | 
             
                end
         | 
| 258 313 |  | 
| 314 | 
            +
                # EXPERIMENTAL: Enumerates all messages in a topic.
         | 
| 315 | 
            +
                def each_message(topic:, offset: :earliest, max_wait_time: 5, min_bytes: 1, max_bytes: 1048576, &block)
         | 
| 316 | 
            +
                  offsets = Hash.new { offset }
         | 
| 317 | 
            +
             | 
| 318 | 
            +
                  loop do
         | 
| 319 | 
            +
                    operation = FetchOperation.new(
         | 
| 320 | 
            +
                      cluster: @cluster,
         | 
| 321 | 
            +
                      logger: @logger,
         | 
| 322 | 
            +
                      min_bytes: min_bytes,
         | 
| 323 | 
            +
                      max_wait_time: max_wait_time,
         | 
| 324 | 
            +
                    )
         | 
| 325 | 
            +
             | 
| 326 | 
            +
                    @cluster.partitions_for(topic).map(&:partition_id).each do |partition|
         | 
| 327 | 
            +
                      partition_offset = offsets[partition]
         | 
| 328 | 
            +
                      operation.fetch_from_partition(topic, partition, offset: partition_offset, max_bytes: max_bytes)
         | 
| 329 | 
            +
                    end
         | 
| 330 | 
            +
             | 
| 331 | 
            +
                    batches = operation.execute
         | 
| 332 | 
            +
             | 
| 333 | 
            +
                    batches.each do |batch|
         | 
| 334 | 
            +
                      batch.messages.each(&block)
         | 
| 335 | 
            +
                      offsets[batch.partition] = batch.last_offset
         | 
| 336 | 
            +
                    end
         | 
| 337 | 
            +
                  end
         | 
| 338 | 
            +
                end
         | 
| 339 | 
            +
             | 
| 259 340 | 
             
                # Lists all topics in the cluster.
         | 
| 260 341 | 
             
                #
         | 
| 261 342 | 
             
                # @return [Array<String>] the list of topic names.
         | 
| @@ -324,12 +405,10 @@ module Kafka | |
| 324 405 | 
             
                  end
         | 
| 325 406 | 
             
                  brokers = []
         | 
| 326 407 | 
             
                  seed_brokers.each do |connection|
         | 
| 327 | 
            -
                     | 
| 328 | 
            -
             | 
| 329 | 
            -
             | 
| 330 | 
            -
                     | 
| 331 | 
            -
                      brokers << connection
         | 
| 332 | 
            -
                    end
         | 
| 408 | 
            +
                    connection.prepend("kafka://") unless connection =~ /:\/\//
         | 
| 409 | 
            +
                    uri = URI.parse(connection)
         | 
| 410 | 
            +
                    uri.port ||= 9092 # Default Kafka port.
         | 
| 411 | 
            +
                    brokers << uri
         | 
| 333 412 | 
             
                  end
         | 
| 334 413 | 
             
                  brokers
         | 
| 335 414 | 
             
                end
         | 
    
        data/lib/kafka/cluster.rb
    CHANGED
    
    | @@ -13,7 +13,7 @@ module Kafka | |
| 13 13 | 
             
                #
         | 
| 14 14 | 
             
                # The cluster will try to fetch cluster metadata from one of the brokers.
         | 
| 15 15 | 
             
                #
         | 
| 16 | 
            -
                # @param seed_brokers [Array< | 
| 16 | 
            +
                # @param seed_brokers [Array<URI>]
         | 
| 17 17 | 
             
                # @param broker_pool [Kafka::BrokerPool]
         | 
| 18 18 | 
             
                # @param logger [Logger]
         | 
| 19 19 | 
             
                def initialize(seed_brokers:, broker_pool:, logger:)
         | 
| @@ -136,10 +136,7 @@ module Kafka | |
| 136 136 | 
             
                    @logger.info "Fetching cluster metadata from #{node}"
         | 
| 137 137 |  | 
| 138 138 | 
             
                    begin
         | 
| 139 | 
            -
                       | 
| 140 | 
            -
                      port ||= 9092 # Default Kafka port.
         | 
| 141 | 
            -
             | 
| 142 | 
            -
                      broker = @broker_pool.connect(host, port.to_i)
         | 
| 139 | 
            +
                      broker = @broker_pool.connect(node.hostname, node.port)
         | 
| 143 140 | 
             
                      cluster_info = broker.fetch_metadata(topics: @target_topics)
         | 
| 144 141 |  | 
| 145 142 | 
             
                      @stale = false
         | 
    
        data/lib/kafka/compressor.rb
    CHANGED
    
    | @@ -20,7 +20,7 @@ module Kafka | |
| 20 20 | 
             
                # @param codec_name [Symbol, nil]
         | 
| 21 21 | 
             
                # @param threshold [Integer] the minimum number of messages in a message set
         | 
| 22 22 | 
             
                #   that will trigger compression.
         | 
| 23 | 
            -
                def initialize(codec_name | 
| 23 | 
            +
                def initialize(codec_name: nil, threshold: 1, instrumenter:)
         | 
| 24 24 | 
             
                  @codec = Compression.find_codec(codec_name)
         | 
| 25 25 | 
             
                  @threshold = threshold
         | 
| 26 26 | 
             
                  @instrumenter = instrumenter
         | 
    
        data/lib/kafka/consumer.rb
    CHANGED
    
    | @@ -65,9 +65,16 @@ module Kafka | |
| 65 65 | 
             
                #
         | 
| 66 66 | 
             
                # @param topic [String] the name of the topic to subscribe to.
         | 
| 67 67 | 
             
                # @param default_offset [Symbol] whether to start from the beginning or the
         | 
| 68 | 
            -
                #   end of the topic's partitions.
         | 
| 68 | 
            +
                #   end of the topic's partitions. Deprecated.
         | 
| 69 | 
            +
                # @param start_from_beginning [Boolean] whether to start from the beginning
         | 
| 70 | 
            +
                #   of the topic or just subscribe to new messages being produced. This
         | 
| 71 | 
            +
                #   only applies when first consuming a topic partition – once the consumer
         | 
| 72 | 
            +
                #   has checkpointed its progress, it will always resume from the last
         | 
| 73 | 
            +
                #   checkpoint.
         | 
| 69 74 | 
             
                # @return [nil]
         | 
| 70 | 
            -
                def subscribe(topic, default_offset: : | 
| 75 | 
            +
                def subscribe(topic, default_offset: nil, start_from_beginning: true)
         | 
| 76 | 
            +
                  default_offset ||= start_from_beginning ? :earliest : :latest
         | 
| 77 | 
            +
             | 
| 71 78 | 
             
                  @group.subscribe(topic)
         | 
| 72 79 | 
             
                  @offset_manager.set_default_offset(topic, default_offset)
         | 
| 73 80 |  | 
| @@ -154,6 +161,7 @@ module Kafka | |
| 154 161 | 
             
                          notification.update(
         | 
| 155 162 | 
             
                            topic: batch.topic,
         | 
| 156 163 | 
             
                            partition: batch.partition,
         | 
| 164 | 
            +
                            offset_lag: batch.offset_lag,
         | 
| 157 165 | 
             
                            highwater_mark_offset: batch.highwater_mark_offset,
         | 
| 158 166 | 
             
                            message_count: batch.messages.count,
         | 
| 159 167 | 
             
                          )
         | 
| @@ -0,0 +1,208 @@ | |
| 1 | 
            +
            begin
         | 
| 2 | 
            +
              require "statsd"
         | 
| 3 | 
            +
            rescue LoadError
         | 
| 4 | 
            +
              $stderr.puts "In order to report Kafka client metrics to Datadog you need to install the `dogstatsd-ruby` gem."
         | 
| 5 | 
            +
              raise
         | 
| 6 | 
            +
            end
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            require "active_support/subscriber"
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            module Kafka
         | 
| 11 | 
            +
             | 
| 12 | 
            +
              # Reports operational metrics to a Datadog agent using the modified Statsd protocol.
         | 
| 13 | 
            +
              #
         | 
| 14 | 
            +
              #     require "kafka/datadog"
         | 
| 15 | 
            +
              #
         | 
| 16 | 
            +
              #     # Default is "ruby_kafka".
         | 
| 17 | 
            +
              #     Kafka::Datadog.namespace = "custom-namespace"
         | 
| 18 | 
            +
              #
         | 
| 19 | 
            +
              #     # Default is "127.0.0.1".
         | 
| 20 | 
            +
              #     Kafka::Datadog.host = "statsd.something.com"
         | 
| 21 | 
            +
              #
         | 
| 22 | 
            +
              #     # Default is 8125.
         | 
| 23 | 
            +
              #     Kafka::Datadog.port = 1234
         | 
| 24 | 
            +
              #
         | 
| 25 | 
            +
              # Once the file has been required, no further configuration is needed – all operational
         | 
| 26 | 
            +
              # metrics are automatically emitted.
         | 
| 27 | 
            +
              module Datadog
         | 
| 28 | 
            +
                STATSD_NAMESPACE = "ruby_kafka"
         | 
| 29 | 
            +
             | 
| 30 | 
            +
                def self.statsd
         | 
| 31 | 
            +
                  @statsd ||= Statsd.new(Statsd::DEFAULT_HOST, Statsd::DEFAULT_HOST, namespace: STATSD_NAMESPACE)
         | 
| 32 | 
            +
                end
         | 
| 33 | 
            +
             | 
| 34 | 
            +
                def self.host=(host)
         | 
| 35 | 
            +
                  statsd.host = host
         | 
| 36 | 
            +
                end
         | 
| 37 | 
            +
             | 
| 38 | 
            +
                def self.port=(port)
         | 
| 39 | 
            +
                  statsd.port = port
         | 
| 40 | 
            +
                end
         | 
| 41 | 
            +
             | 
| 42 | 
            +
                def self.namespace=(namespace)
         | 
| 43 | 
            +
                  statsd.namespace = namespace
         | 
| 44 | 
            +
                end
         | 
| 45 | 
            +
             | 
| 46 | 
            +
                class StatsdSubscriber < ActiveSupport::Subscriber
         | 
| 47 | 
            +
                  private
         | 
| 48 | 
            +
             | 
| 49 | 
            +
                  %w[increment histogram count timing gauge].each do |type|
         | 
| 50 | 
            +
                    define_method(type) do |*args|
         | 
| 51 | 
            +
                      emit(type, *args)
         | 
| 52 | 
            +
                    end
         | 
| 53 | 
            +
                  end
         | 
| 54 | 
            +
             | 
| 55 | 
            +
                  def emit(type, *args, tags: {})
         | 
| 56 | 
            +
                    tags = tags.map {|k, v| "#{k}:#{v}" }.to_a
         | 
| 57 | 
            +
             | 
| 58 | 
            +
                    Kafka::Datadog.statsd.send(type, *args, tags: tags)
         | 
| 59 | 
            +
                  end
         | 
| 60 | 
            +
                end
         | 
| 61 | 
            +
             | 
| 62 | 
            +
                class ConnectionSubscriber < StatsdSubscriber
         | 
| 63 | 
            +
                  def request(event)
         | 
| 64 | 
            +
                    client = event.payload.fetch(:client_id)
         | 
| 65 | 
            +
                    api = event.payload.fetch(:api, "unknown")
         | 
| 66 | 
            +
                    request_size = event.payload.fetch(:request_size, 0)
         | 
| 67 | 
            +
                    response_size = event.payload.fetch(:response_size, 0)
         | 
| 68 | 
            +
                    broker = event.payload.fetch(:broker_host)
         | 
| 69 | 
            +
             | 
| 70 | 
            +
                    tags = {
         | 
| 71 | 
            +
                      client: client,
         | 
| 72 | 
            +
                      api: api,
         | 
| 73 | 
            +
                      broker: broker
         | 
| 74 | 
            +
                    }
         | 
| 75 | 
            +
             | 
| 76 | 
            +
                    timing("api.latency", event.duration, tags: tags)
         | 
| 77 | 
            +
                    increment("api.calls", tags: tags)
         | 
| 78 | 
            +
             | 
| 79 | 
            +
                    histogram("api.request_size", request_size, tags: tags)
         | 
| 80 | 
            +
                    histogram("api.response_size", response_size, tags: tags)
         | 
| 81 | 
            +
             | 
| 82 | 
            +
                    if event.payload.key?(:exception)
         | 
| 83 | 
            +
                      increment("api.errors", tags: tags)
         | 
| 84 | 
            +
                    end
         | 
| 85 | 
            +
                  end
         | 
| 86 | 
            +
             | 
| 87 | 
            +
                  attach_to "connection.kafka"
         | 
| 88 | 
            +
                end
         | 
| 89 | 
            +
             | 
| 90 | 
            +
                class ConsumerSubscriber < StatsdSubscriber
         | 
| 91 | 
            +
                  def process_message(event)
         | 
| 92 | 
            +
                    lag = event.payload.fetch(:offset_lag)
         | 
| 93 | 
            +
             | 
| 94 | 
            +
                    tags = {
         | 
| 95 | 
            +
                      client: event.payload.fetch(:client_id),
         | 
| 96 | 
            +
                      topic: event.payload.fetch(:topic),
         | 
| 97 | 
            +
                      partition: event.payload.fetch(:partition),
         | 
| 98 | 
            +
                    }
         | 
| 99 | 
            +
             | 
| 100 | 
            +
                    if event.payload.key?(:exception)
         | 
| 101 | 
            +
                      increment("consumer.process_message.errors", tags: tags)
         | 
| 102 | 
            +
                    else
         | 
| 103 | 
            +
                      timing("consumer.process_message.latency", event.duration, tags: tags)
         | 
| 104 | 
            +
                      increment("consumer.messages", tags: tags)
         | 
| 105 | 
            +
                    end
         | 
| 106 | 
            +
             | 
| 107 | 
            +
                    gauge("consumer.lag", lag, tags: tags)
         | 
| 108 | 
            +
                  end
         | 
| 109 | 
            +
             | 
| 110 | 
            +
                  def process_batch(event)
         | 
| 111 | 
            +
                    messages = event.payload.fetch(:message_count)
         | 
| 112 | 
            +
             | 
| 113 | 
            +
                    tags = {
         | 
| 114 | 
            +
                      client: event.payload.fetch(:client_id),
         | 
| 115 | 
            +
                      topic: event.payload.fetch(:topic),
         | 
| 116 | 
            +
                      partition: event.payload.fetch(:partition),
         | 
| 117 | 
            +
                    }
         | 
| 118 | 
            +
             | 
| 119 | 
            +
                    if event.payload.key?(:exception)
         | 
| 120 | 
            +
                      increment("consumer.process_batch.errors", tags: tags)
         | 
| 121 | 
            +
                    else
         | 
| 122 | 
            +
                      timing("consumer.process_batch.latency", event.duration, tags: tags)
         | 
| 123 | 
            +
                      count("consumer.messages", messages, tags: tags)
         | 
| 124 | 
            +
                    end
         | 
| 125 | 
            +
                  end
         | 
| 126 | 
            +
             | 
| 127 | 
            +
                  attach_to "consumer.kafka"
         | 
| 128 | 
            +
                end
         | 
| 129 | 
            +
             | 
| 130 | 
            +
                class ProducerSubscriber < StatsdSubscriber
         | 
| 131 | 
            +
                  def produce_message(event)
         | 
| 132 | 
            +
                    client = event.payload.fetch(:client_id)
         | 
| 133 | 
            +
                    topic = event.payload.fetch(:topic)
         | 
| 134 | 
            +
                    buffer_size = event.payload.fetch(:buffer_size)
         | 
| 135 | 
            +
                    max_buffer_size = event.payload.fetch(:max_buffer_size)
         | 
| 136 | 
            +
                    buffer_fill_ratio = buffer_size.to_f / max_buffer_size.to_f
         | 
| 137 | 
            +
             | 
| 138 | 
            +
                    tags = {
         | 
| 139 | 
            +
                      client: client,
         | 
| 140 | 
            +
                    }
         | 
| 141 | 
            +
             | 
| 142 | 
            +
                    # This gets us the write rate.
         | 
| 143 | 
            +
                    increment("producer.produce.messages", tags: tags.merge(topic: topic))
         | 
| 144 | 
            +
             | 
| 145 | 
            +
                    # This gets us the avg/max buffer size per producer.
         | 
| 146 | 
            +
                    histogram("producer.buffer.size", buffer_size, tags: tags)
         | 
| 147 | 
            +
             | 
| 148 | 
            +
                    # This gets us the avg/max buffer fill ratio per producer.
         | 
| 149 | 
            +
                    histogram("producer.buffer.fill_ratio", buffer_fill_ratio, tags: tags)
         | 
| 150 | 
            +
                  end
         | 
| 151 | 
            +
             | 
| 152 | 
            +
                  def buffer_overflow(event)
         | 
| 153 | 
            +
                    tags = {
         | 
| 154 | 
            +
                      client: event.payload.fetch(:client_id),
         | 
| 155 | 
            +
                      topic: event.payload.fetch(:topic),
         | 
| 156 | 
            +
                    }
         | 
| 157 | 
            +
             | 
| 158 | 
            +
                    increment("producer.produce.errors", tags: tags)
         | 
| 159 | 
            +
                  end
         | 
| 160 | 
            +
             | 
| 161 | 
            +
                  def deliver_messages(event)
         | 
| 162 | 
            +
                    client = event.payload.fetch(:client_id)
         | 
| 163 | 
            +
                    message_count = event.payload.fetch(:delivered_message_count)
         | 
| 164 | 
            +
                    attempts = event.payload.fetch(:attempts)
         | 
| 165 | 
            +
             | 
| 166 | 
            +
                    tags = {
         | 
| 167 | 
            +
                      client: client,
         | 
| 168 | 
            +
                    }
         | 
| 169 | 
            +
             | 
| 170 | 
            +
                    if event.payload.key?(:exception)
         | 
| 171 | 
            +
                      increment("producer.deliver.errors", tags: tags)
         | 
| 172 | 
            +
                    end
         | 
| 173 | 
            +
             | 
| 174 | 
            +
                    timing("producer.deliver.latency", event.duration, tags: tags)
         | 
| 175 | 
            +
             | 
| 176 | 
            +
                    # Messages delivered to Kafka:
         | 
| 177 | 
            +
                    count("producer.deliver.messages", message_count, tags: tags)
         | 
| 178 | 
            +
             | 
| 179 | 
            +
                    # Number of attempts to deliver messages:
         | 
| 180 | 
            +
                    histogram("producer.deliver.attempts", attempts, tags: tags)
         | 
| 181 | 
            +
                  end
         | 
| 182 | 
            +
             | 
| 183 | 
            +
                  def ack_message(event)
         | 
| 184 | 
            +
                    tags = {
         | 
| 185 | 
            +
                      client: event.payload.fetch(:client_id),
         | 
| 186 | 
            +
                      topic: event.payload.fetch(:topic),
         | 
| 187 | 
            +
                    }
         | 
| 188 | 
            +
             | 
| 189 | 
            +
                    # Number of messages ACK'd for the topic.
         | 
| 190 | 
            +
                    increment("producer.ack.messages", tags: tags)
         | 
| 191 | 
            +
             | 
| 192 | 
            +
                    # Histogram of delay between a message being produced and it being ACK'd.
         | 
| 193 | 
            +
                    histogram("producer.ack.delay", event.payload.fetch(:delay), tags: tags)
         | 
| 194 | 
            +
                  end
         | 
| 195 | 
            +
             | 
| 196 | 
            +
                  def topic_error(event)
         | 
| 197 | 
            +
                    tags = {
         | 
| 198 | 
            +
                      client: event.payload.fetch(:client_id),
         | 
| 199 | 
            +
                      topic: event.payload.fetch(:topic)
         | 
| 200 | 
            +
                    }
         | 
| 201 | 
            +
             | 
| 202 | 
            +
                    increment("producer.ack.errors", tags: tags)
         | 
| 203 | 
            +
                  end
         | 
| 204 | 
            +
             | 
| 205 | 
            +
                  attach_to "producer.kafka"
         | 
| 206 | 
            +
                end
         | 
| 207 | 
            +
              end
         | 
| 208 | 
            +
            end
         | 
    
        data/lib/kafka/fetched_batch.rb
    CHANGED
    
    
    
        data/lib/kafka/instrumenter.rb
    CHANGED
    
    | @@ -22,4 +22,15 @@ module Kafka | |
| 22 22 | 
             
                  end
         | 
| 23 23 | 
             
                end
         | 
| 24 24 | 
             
              end
         | 
| 25 | 
            +
             | 
| 26 | 
            +
              class DecoratingInstrumenter
         | 
| 27 | 
            +
                def initialize(backend, extra_payload = {})
         | 
| 28 | 
            +
                  @backend = backend
         | 
| 29 | 
            +
                  @extra_payload = extra_payload
         | 
| 30 | 
            +
                end
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                def instrument(event_name, payload = {}, &block)
         | 
| 33 | 
            +
                  @backend.instrument(event_name, @extra_payload.merge(payload), &block)
         | 
| 34 | 
            +
                end
         | 
| 35 | 
            +
              end
         | 
| 25 36 | 
             
            end
         | 
| @@ -1,18 +1,11 @@ | |
| 1 1 | 
             
            module Kafka
         | 
| 2 | 
            -
               | 
| 3 | 
            -
                 | 
| 4 | 
            -
             | 
| 5 | 
            -
                 | 
| 6 | 
            -
             | 
| 7 | 
            -
                 | 
| 8 | 
            -
             | 
| 9 | 
            -
             | 
| 10 | 
            -
             | 
| 11 | 
            -
                  @partition = partition
         | 
| 12 | 
            -
                  @partition_key = partition_key
         | 
| 13 | 
            -
                  @create_time = create_time
         | 
| 14 | 
            -
             | 
| 15 | 
            -
                  @bytesize = key.to_s.bytesize + value.to_s.bytesize
         | 
| 16 | 
            -
                end
         | 
| 17 | 
            -
              end
         | 
| 2 | 
            +
              PendingMessage = Struct.new(
         | 
| 3 | 
            +
                "PendingMessage",
         | 
| 4 | 
            +
                :value,
         | 
| 5 | 
            +
                :key,
         | 
| 6 | 
            +
                :topic,
         | 
| 7 | 
            +
                :partition,
         | 
| 8 | 
            +
                :partition_key,
         | 
| 9 | 
            +
                :create_time,
         | 
| 10 | 
            +
                :bytesize)
         | 
| 18 11 | 
             
            end
         | 
| @@ -138,6 +138,7 @@ module Kafka | |
| 138 138 | 
             
                      @logger.error "Corrupt message when writing to #{topic}/#{partition}"
         | 
| 139 139 | 
             
                    rescue Kafka::UnknownTopicOrPartition
         | 
| 140 140 | 
             
                      @logger.error "Unknown topic or partition #{topic}/#{partition}"
         | 
| 141 | 
            +
                      @cluster.mark_as_stale!
         | 
| 141 142 | 
             
                    rescue Kafka::LeaderNotAvailable
         | 
| 142 143 | 
             
                      @logger.error "Leader currently not available for #{topic}/#{partition}"
         | 
| 143 144 | 
             
                      @cluster.mark_as_stale!
         | 
    
        data/lib/kafka/producer.rb
    CHANGED
    
    | @@ -134,7 +134,7 @@ module Kafka | |
| 134 134 | 
             
                  @cluster = cluster
         | 
| 135 135 | 
             
                  @logger = logger
         | 
| 136 136 | 
             
                  @instrumenter = instrumenter
         | 
| 137 | 
            -
                  @required_acks = required_acks
         | 
| 137 | 
            +
                  @required_acks = required_acks == :all ? -1 : required_acks
         | 
| 138 138 | 
             
                  @ack_timeout = ack_timeout
         | 
| 139 139 | 
             
                  @max_retries = max_retries
         | 
| 140 140 | 
             
                  @retry_backoff = retry_backoff
         | 
| @@ -185,12 +185,13 @@ module Kafka | |
| 185 185 | 
             
                  create_time = Time.now
         | 
| 186 186 |  | 
| 187 187 | 
             
                  message = PendingMessage.new(
         | 
| 188 | 
            -
                    value | 
| 189 | 
            -
                    key | 
| 190 | 
            -
                    topic | 
| 191 | 
            -
                    partition | 
| 192 | 
            -
                    partition_key | 
| 193 | 
            -
                    create_time | 
| 188 | 
            +
                    value,
         | 
| 189 | 
            +
                    key,
         | 
| 190 | 
            +
                    topic,
         | 
| 191 | 
            +
                    partition,
         | 
| 192 | 
            +
                    partition_key,
         | 
| 193 | 
            +
                    create_time,
         | 
| 194 | 
            +
                    key.to_s.bytesize + value.to_s.bytesize
         | 
| 194 195 | 
             
                  )
         | 
| 195 196 |  | 
| 196 197 | 
             
                  if buffer_size >= @max_buffer_size
         | 
    
        data/lib/kafka/version.rb
    CHANGED
    
    
| @@ -0,0 +1,39 @@ | |
| 1 | 
            +
            $LOAD_PATH.unshift(File.expand_path("../../lib", __FILE__))
         | 
| 2 | 
            +
            $LOAD_PATH.unshift(File.expand_path("../../spec", __FILE__))
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            require "kafka"
         | 
| 5 | 
            +
            require "ruby-prof"
         | 
| 6 | 
            +
            require "dotenv"
         | 
| 7 | 
            +
            require "test_cluster"
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            Dotenv.load
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            # Number of times do iterate.
         | 
| 12 | 
            +
            N = 10_000
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            KAFKA_CLUSTER = TestCluster.new
         | 
| 15 | 
            +
            KAFKA_CLUSTER.start
         | 
| 16 | 
            +
             | 
| 17 | 
            +
            logger = Logger.new(nil)
         | 
| 18 | 
            +
             | 
| 19 | 
            +
            kafka = Kafka.new(
         | 
| 20 | 
            +
              seed_brokers: KAFKA_CLUSTER.kafka_hosts,
         | 
| 21 | 
            +
              client_id: "test",
         | 
| 22 | 
            +
              logger: logger,
         | 
| 23 | 
            +
            )
         | 
| 24 | 
            +
             | 
| 25 | 
            +
            producer = kafka.producer(
         | 
| 26 | 
            +
              max_buffer_size: 100_000,
         | 
| 27 | 
            +
            )
         | 
| 28 | 
            +
             | 
| 29 | 
            +
            RubyProf.start
         | 
| 30 | 
            +
             | 
| 31 | 
            +
            N.times do
         | 
| 32 | 
            +
              producer.produce("hello", topic: "greetings")
         | 
| 33 | 
            +
            end
         | 
| 34 | 
            +
             | 
| 35 | 
            +
            result = RubyProf.stop
         | 
| 36 | 
            +
            printer = RubyProf::FlatPrinter.new(result)
         | 
| 37 | 
            +
            printer.print(STDOUT)
         | 
| 38 | 
            +
             | 
| 39 | 
            +
            KAFKA_CLUSTER.stop
         | 
    
        data/ruby-kafka.gemspec
    CHANGED
    
    | @@ -34,8 +34,10 @@ Gem::Specification.new do |spec| | |
| 34 34 | 
             
              spec.add_development_dependency "dotenv"
         | 
| 35 35 | 
             
              spec.add_development_dependency "docker-api"
         | 
| 36 36 | 
             
              spec.add_development_dependency "rspec-benchmark"
         | 
| 37 | 
            -
              spec.add_development_dependency "activesupport" | 
| 37 | 
            +
              spec.add_development_dependency "activesupport"
         | 
| 38 38 | 
             
              spec.add_development_dependency "snappy"
         | 
| 39 39 | 
             
              spec.add_development_dependency "colored"
         | 
| 40 40 | 
             
              spec.add_development_dependency "rspec_junit_formatter", "0.2.2"
         | 
| 41 | 
            +
              spec.add_development_dependency "dogstatsd-ruby"
         | 
| 42 | 
            +
              spec.add_development_dependency "ruby-prof"
         | 
| 41 43 | 
             
            end
         | 
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: ruby-kafka
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.3. | 
| 4 | 
            +
              version: 0.3.9
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Daniel Schierbeck
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: exe
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2016- | 
| 11 | 
            +
            date: 2016-06-16 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: bundler
         | 
| @@ -114,20 +114,14 @@ dependencies: | |
| 114 114 | 
             
                requirements:
         | 
| 115 115 | 
             
                - - ">="
         | 
| 116 116 | 
             
                  - !ruby/object:Gem::Version
         | 
| 117 | 
            -
                    version:  | 
| 118 | 
            -
                - - "<"
         | 
| 119 | 
            -
                  - !ruby/object:Gem::Version
         | 
| 120 | 
            -
                    version: '5.1'
         | 
| 117 | 
            +
                    version: '0'
         | 
| 121 118 | 
             
              type: :development
         | 
| 122 119 | 
             
              prerelease: false
         | 
| 123 120 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 124 121 | 
             
                requirements:
         | 
| 125 122 | 
             
                - - ">="
         | 
| 126 123 | 
             
                  - !ruby/object:Gem::Version
         | 
| 127 | 
            -
                    version:  | 
| 128 | 
            -
                - - "<"
         | 
| 129 | 
            -
                  - !ruby/object:Gem::Version
         | 
| 130 | 
            -
                    version: '5.1'
         | 
| 124 | 
            +
                    version: '0'
         | 
| 131 125 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 132 126 | 
             
              name: snappy
         | 
| 133 127 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| @@ -170,6 +164,34 @@ dependencies: | |
| 170 164 | 
             
                - - '='
         | 
| 171 165 | 
             
                  - !ruby/object:Gem::Version
         | 
| 172 166 | 
             
                    version: 0.2.2
         | 
| 167 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 168 | 
            +
              name: dogstatsd-ruby
         | 
| 169 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 170 | 
            +
                requirements:
         | 
| 171 | 
            +
                - - ">="
         | 
| 172 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 173 | 
            +
                    version: '0'
         | 
| 174 | 
            +
              type: :development
         | 
| 175 | 
            +
              prerelease: false
         | 
| 176 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 177 | 
            +
                requirements:
         | 
| 178 | 
            +
                - - ">="
         | 
| 179 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 180 | 
            +
                    version: '0'
         | 
| 181 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 182 | 
            +
              name: ruby-prof
         | 
| 183 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 184 | 
            +
                requirements:
         | 
| 185 | 
            +
                - - ">="
         | 
| 186 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 187 | 
            +
                    version: '0'
         | 
| 188 | 
            +
              type: :development
         | 
| 189 | 
            +
              prerelease: false
         | 
| 190 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 191 | 
            +
                requirements:
         | 
| 192 | 
            +
                - - ">="
         | 
| 193 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 194 | 
            +
                    version: '0'
         | 
| 173 195 | 
             
            description: |-
         | 
| 174 196 | 
             
              A client library for the Kafka distributed commit log.
         | 
| 175 197 |  | 
| @@ -185,11 +207,16 @@ files: | |
| 185 207 | 
             
            - ".yardopts"
         | 
| 186 208 | 
             
            - CHANGELOG.md
         | 
| 187 209 | 
             
            - Gemfile
         | 
| 210 | 
            +
            - Gemfile.lock
         | 
| 188 211 | 
             
            - LICENSE.txt
         | 
| 212 | 
            +
            - Procfile
         | 
| 189 213 | 
             
            - README.md
         | 
| 190 214 | 
             
            - Rakefile
         | 
| 191 215 | 
             
            - bin/console
         | 
| 192 216 | 
             
            - bin/setup
         | 
| 217 | 
            +
            - ci/consumer.rb
         | 
| 218 | 
            +
            - ci/init.rb
         | 
| 219 | 
            +
            - ci/producer.rb
         | 
| 193 220 | 
             
            - circle.yml
         | 
| 194 221 | 
             
            - examples/firehose-consumer.rb
         | 
| 195 222 | 
             
            - examples/firehose-producer.rb
         | 
| @@ -208,6 +235,7 @@ files: | |
| 208 235 | 
             
            - lib/kafka/connection_builder.rb
         | 
| 209 236 | 
             
            - lib/kafka/consumer.rb
         | 
| 210 237 | 
             
            - lib/kafka/consumer_group.rb
         | 
| 238 | 
            +
            - lib/kafka/datadog.rb
         | 
| 211 239 | 
             
            - lib/kafka/fetch_operation.rb
         | 
| 212 240 | 
             
            - lib/kafka/fetched_batch.rb
         | 
| 213 241 | 
             
            - lib/kafka/fetched_message.rb
         | 
| @@ -257,6 +285,7 @@ files: | |
| 257 285 | 
             
            - lib/kafka/ssl_socket_with_timeout.rb
         | 
| 258 286 | 
             
            - lib/kafka/version.rb
         | 
| 259 287 | 
             
            - lib/ruby-kafka.rb
         | 
| 288 | 
            +
            - performance/profile.rb
         | 
| 260 289 | 
             
            - ruby-kafka.gemspec
         | 
| 261 290 | 
             
            homepage: https://github.com/zendesk/ruby-kafka
         | 
| 262 291 | 
             
            licenses:
         |