ruby-kafka 0.3.17 → 0.3.18.beta1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/Gemfile.lock +6 -2
- data/README.md +76 -14
- data/lib/kafka.rb +72 -0
- data/lib/kafka/broker.rb +22 -0
- data/lib/kafka/client.rb +29 -1
- data/lib/kafka/cluster.rb +4 -1
- data/lib/kafka/connection.rb +114 -23
- data/lib/kafka/connection_builder.rb +30 -3
- data/lib/kafka/consumer.rb +35 -10
- data/lib/kafka/datadog.rb +1 -1
- data/lib/kafka/fetch_operation.rb +14 -6
- data/lib/kafka/instrumenter.rb +28 -4
- data/lib/kafka/protocol.rb +27 -9
- data/lib/kafka/protocol/null_response.rb +11 -0
- data/lib/kafka/protocol/sasl_handshake_request.rb +31 -0
- data/lib/kafka/protocol/sasl_handshake_response.rb +26 -0
- data/lib/kafka/sasl_gssapi_authenticator.rb +69 -0
- data/lib/kafka/version.rb +1 -1
- data/ruby-kafka.gemspec +2 -0
- data/vendor/bundle/bin/bundler +17 -0
- data/vendor/bundle/bin/coderay +17 -0
- data/vendor/bundle/bin/dotenv +17 -0
- data/vendor/bundle/bin/htmldiff +17 -0
- data/vendor/bundle/bin/ldiff +17 -0
- data/vendor/bundle/bin/pry +17 -0
- data/vendor/bundle/bin/rake +17 -0
- data/vendor/bundle/bin/rspec +17 -0
- data/vendor/bundle/bin/ruby-prof +17 -0
- data/vendor/bundle/bin/ruby-prof-check-trace +17 -0
- metadata +32 -4
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA1:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 80c7dd382316d8fd5465ab7f73a4393cf3ed01cf
         | 
| 4 | 
            +
              data.tar.gz: b30842098f9076127c33eb762dffc71424de2866
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 90b3ed06f47b6e2b22a7623bcf3825206b7745170207cb9e2226adcc2ee60988769e7ad498a241e30eb0f04dbf3daa56acef4c91c5334b04ded7d1cb429e4866
         | 
| 7 | 
            +
              data.tar.gz: 72c20e866cd03e575bbae4d82a86fe56195463412929dbb4656c1cfc16ef4c9f109317897ad7773cc79ccd22da395c440522b174e786f2ff358629c4db897dc8
         | 
    
        data/.gitignore
    CHANGED
    
    
    
        data/Gemfile.lock
    CHANGED
    
    | @@ -1,7 +1,8 @@ | |
| 1 1 | 
             
            PATH
         | 
| 2 2 | 
             
              remote: .
         | 
| 3 3 | 
             
              specs:
         | 
| 4 | 
            -
                ruby-kafka (0.3. | 
| 4 | 
            +
                ruby-kafka (0.3.18.beta1)
         | 
| 5 | 
            +
                  gssapi (>= 1.2.0)
         | 
| 5 6 |  | 
| 6 7 | 
             
            GEM
         | 
| 7 8 | 
             
              remote: https://rubygems.org/
         | 
| @@ -23,6 +24,9 @@ GEM | |
| 23 24 | 
             
                dogstatsd-ruby (2.1.0)
         | 
| 24 25 | 
             
                dotenv (2.1.0)
         | 
| 25 26 | 
             
                excon (0.54.0)
         | 
| 27 | 
            +
                ffi (1.9.18)
         | 
| 28 | 
            +
                gssapi (1.2.0)
         | 
| 29 | 
            +
                  ffi (>= 1.0.1)
         | 
| 26 30 | 
             
                i18n (0.7.0)
         | 
| 27 31 | 
             
                json (1.8.3)
         | 
| 28 32 | 
             
                method_source (0.8.2)
         | 
| @@ -83,4 +87,4 @@ RUBY VERSION | |
| 83 87 | 
             
               ruby 2.2.3p173
         | 
| 84 88 |  | 
| 85 89 | 
             
            BUNDLED WITH
         | 
| 86 | 
            -
               1. | 
| 90 | 
            +
               1.14.6
         | 
    
        data/README.md
    CHANGED
    
    | @@ -33,7 +33,9 @@ Although parts of this library work with Kafka 0.8 – specifically, the Produce | |
| 33 33 | 
             
                7. [Monitoring](#monitoring)
         | 
| 34 34 | 
             
                    1. [Reporting Metrics to Datadog](#reporting-metrics-to-datadog)
         | 
| 35 35 | 
             
                8. [Understanding Timeouts](#understanding-timeouts)
         | 
| 36 | 
            -
                9. [ | 
| 36 | 
            +
                9. [Security](#security)
         | 
| 37 | 
            +
                    1. [Encryption and Authentication using SSL](#encryption-and-authentication-using-ssl)
         | 
| 38 | 
            +
                    2. [Authentication using SASL](#authentication-using-sasl)
         | 
| 37 39 | 
             
            4. [Design](#design)
         | 
| 38 40 | 
             
                1. [Producer Design](#producer-design)
         | 
| 39 41 | 
             
                2. [Asynchronous Producer Design](#asynchronous-producer-design)
         | 
| @@ -101,7 +103,7 @@ require "kafka" | |
| 101 103 | 
             
            kafka = Kafka.new(
         | 
| 102 104 | 
             
              # At least one of these nodes must be available:
         | 
| 103 105 | 
             
              seed_brokers: ["kafka1:9092", "kafka2:9092"],
         | 
| 104 | 
            -
             | 
| 106 | 
            +
             | 
| 105 107 | 
             
              # Set an optional client id in order to identify the client to Kafka:
         | 
| 106 108 | 
             
              client_id: "my-application",
         | 
| 107 109 | 
             
            )
         | 
| @@ -198,7 +200,7 @@ These policies can be used alone or in combination. | |
| 198 200 | 
             
            producer = kafka.async_producer(
         | 
| 199 201 | 
             
              # Trigger a delivery once 100 messages have been buffered.
         | 
| 200 202 | 
             
              delivery_threshold: 100,
         | 
| 201 | 
            -
             | 
| 203 | 
            +
             | 
| 202 204 | 
             
              # Trigger a delivery every 30 seconds.
         | 
| 203 205 | 
             
              delivery_interval: 30,
         | 
| 204 206 | 
             
            )
         | 
| @@ -435,13 +437,13 @@ In your controllers, simply call the producer directly: | |
| 435 437 | 
             
            class OrdersController
         | 
| 436 438 | 
             
              def create
         | 
| 437 439 | 
             
                @order = Order.create!(params[:order])
         | 
| 438 | 
            -
             | 
| 440 | 
            +
             | 
| 439 441 | 
             
                event = {
         | 
| 440 442 | 
             
                  order_id: @order.id,
         | 
| 441 443 | 
             
                  amount: @order.amount,
         | 
| 442 444 | 
             
                  timestamp: Time.now,
         | 
| 443 445 | 
             
                }
         | 
| 444 | 
            -
             | 
| 446 | 
            +
             | 
| 445 447 | 
             
                $kafka_producer.produce(event.to_json, topic: "order_events")
         | 
| 446 448 | 
             
              end
         | 
| 447 449 | 
             
            end
         | 
| @@ -449,8 +451,6 @@ end | |
| 449 451 |  | 
| 450 452 | 
             
            ### Consuming Messages from Kafka
         | 
| 451 453 |  | 
| 452 | 
            -
            **Warning:** The Consumer API is still alpha level and will likely change. The consumer code should not be considered stable, as it hasn't been exhaustively tested in production environments yet.
         | 
| 453 | 
            -
             | 
| 454 454 | 
             
            Consuming messages from a Kafka topic is simple:
         | 
| 455 455 |  | 
| 456 456 | 
             
            ```ruby
         | 
| @@ -501,9 +501,11 @@ Each consumer process will be assigned one or more partitions from each topic th | |
| 501 501 |  | 
| 502 502 | 
             
            In order to be able to resume processing after a consumer crashes, each consumer will periodically _checkpoint_ its position within each partition it reads from. Since each partition has a monotonically increasing sequence of message offsets, this works by _committing_ the offset of the last message that was processed in a given partition. Kafka handles these commits and allows another consumer in a group to resume from the last commit when a member crashes or becomes unresponsive.
         | 
| 503 503 |  | 
| 504 | 
            -
            By default, offsets are committed every 10 seconds. You can increase the frequency, known as the _offset commit interval_, to limit the duration of double-processing scenarios, at the cost of a lower throughput due to the added coordination. If you want to improve throughput, and double-processing is of less concern to you, then you can decrease the frequency.
         | 
| 504 | 
            +
            By default, offsets are committed every 10 seconds. You can increase the frequency, known as the _offset commit interval_, to limit the duration of double-processing scenarios, at the cost of a lower throughput due to the added coordination. If you want to improve throughput, and double-processing is of less concern to you, then you can decrease the frequency. Set the commit interval to zero in order to disable the timer-based commit trigger entirely.
         | 
| 505 | 
            +
             | 
| 506 | 
            +
            In addition to the time based trigger it's possible to trigger checkpointing in response to _n_ messages having been processed, known as the _offset commit threshold_. This puts a bound on the number of messages that can be double-processed before the problem is detected. Setting this to 1 will cause an offset commit to take place every time a message has been processed. By default this trigger is disabled (set to zero).
         | 
| 505 507 |  | 
| 506 | 
            -
             | 
| 508 | 
            +
            It is possible to trigger an immediate offset commit by calling `Consumer#commit_offsets`. This blocks the caller until the Kafka cluster has acknowledged the commit.
         | 
| 507 509 |  | 
| 508 510 | 
             
            Stale offsets are periodically purged by the broker. The broker setting `offsets.retention.minutes` controls the retention window for committed offsets, and defaults to 1 day. The length of the retention window, known as _offset retention time_, can be changed for the consumer.
         | 
| 509 511 |  | 
| @@ -524,6 +526,46 @@ consumer = kafka.consumer( | |
| 524 526 | 
             
            )
         | 
| 525 527 | 
             
            ```
         | 
| 526 528 |  | 
| 529 | 
            +
            For some use cases it may be necessary to control when messages are marked as processed. Note that since only the consumer position within each partition can be saved, marking a message as processed implies that all messages in the partition with a lower offset should also be considered as having been processed.
         | 
| 530 | 
            +
             | 
| 531 | 
            +
            The method `Consumer#mark_message_as_processed` marks a message (and all those that precede it in a partition) as having been processed. This is an advanced API that you should only use if you know what you're doing.
         | 
| 532 | 
            +
             | 
| 533 | 
            +
            ```ruby
         | 
| 534 | 
            +
            # Manually controlling checkpointing:
         | 
| 535 | 
            +
             | 
| 536 | 
            +
            # Typically you want to use this API in order to buffer messages until some
         | 
| 537 | 
            +
            # special "commit" message is received, e.g. in order to group together
         | 
| 538 | 
            +
            # transactions consisting of several items.
         | 
| 539 | 
            +
            buffer = []
         | 
| 540 | 
            +
             | 
| 541 | 
            +
            # Messages will not be marked as processed automatically. If you shut down the
         | 
| 542 | 
            +
            # consumer without calling `#mark_message_as_processed` first, the consumer will
         | 
| 543 | 
            +
            # not resume where you left off!
         | 
| 544 | 
            +
            consumer.each_message(automatically_mark_as_processed: false) do |message|
         | 
| 545 | 
            +
              # Our messages are JSON with a `type` field and other stuff.
         | 
| 546 | 
            +
              event = JSON.parse(message.value)
         | 
| 547 | 
            +
             | 
| 548 | 
            +
              case event.fetch("type")
         | 
| 549 | 
            +
              when "add_to_cart"
         | 
| 550 | 
            +
                buffer << event
         | 
| 551 | 
            +
              when "complete_purchase"
         | 
| 552 | 
            +
                # We've received all the messages we need, time to save the transaction.
         | 
| 553 | 
            +
                save_transaction(buffer)
         | 
| 554 | 
            +
             | 
| 555 | 
            +
                # Now we can set the checkpoint by marking the last message as processed.
         | 
| 556 | 
            +
                consumer.mark_message_as_processed(message)
         | 
| 557 | 
            +
             | 
| 558 | 
            +
                # We can optionally trigger an immediate, blocking offset commit in order
         | 
| 559 | 
            +
                # to minimize the risk of crashing before the automatic triggers have
         | 
| 560 | 
            +
                # kicked in.
         | 
| 561 | 
            +
                consumer.commit_offsets
         | 
| 562 | 
            +
             | 
| 563 | 
            +
                # Make the buffer ready for the next transaction.
         | 
| 564 | 
            +
                buffer.clear
         | 
| 565 | 
            +
              end
         | 
| 566 | 
            +
            end
         | 
| 567 | 
            +
            ```
         | 
| 568 | 
            +
             | 
| 527 569 |  | 
| 528 570 | 
             
            #### Topic Subscriptions
         | 
| 529 571 |  | 
| @@ -614,9 +656,9 @@ If you want to have at most one second of latency, set `max_wait_time: 1`. | |
| 614 656 |  | 
| 615 657 | 
             
            ### Thread Safety
         | 
| 616 658 |  | 
| 617 | 
            -
            You typically don't want to share a Kafka client between threads, since the network communication is not synchronized. Furthermore, you should avoid using threads in a consumer unless you're very careful about waiting for all work to complete before returning from the `#each_message` or `#each_batch` block. This is because _checkpointing_ assumes that returning from the block means that the messages that have been yielded have been successfully processed.
         | 
| 659 | 
            +
            You typically don't want to share a Kafka client object between threads, since the network communication is not synchronized. Furthermore, you should avoid using threads in a consumer unless you're very careful about waiting for all work to complete before returning from the `#each_message` or `#each_batch` block. This is because _checkpointing_ assumes that returning from the block means that the messages that have been yielded have been successfully processed.
         | 
| 618 660 |  | 
| 619 | 
            -
            You should also avoid sharing a synchronous producer between threads, as the internal buffers are not thread safe. However, the _asynchronous_ producer should be safe to use in a multi-threaded environment.
         | 
| 661 | 
            +
            You should also avoid sharing a synchronous producer between threads, as the internal buffers are not thread safe. However, the _asynchronous_ producer should be safe to use in a multi-threaded environment. This is because producers, when instantiated, get their own copy of any non-thread-safe data such as network sockets. Furthermore, the asynchronous producer has been designed in such a way to only a single background thread operates on this data while any foreground thread with a reference to the producer object can only send messages to that background thread over a safe queue. Therefore it is safe to share an async producer object between many threads.
         | 
| 620 662 |  | 
| 621 663 | 
             
            ### Logging
         | 
| 622 664 |  | 
| @@ -721,11 +763,13 @@ When sending many messages, it's likely that the client needs to send some messa | |
| 721 763 |  | 
| 722 764 | 
             
            Make sure your application can survive being blocked for so long.
         | 
| 723 765 |  | 
| 724 | 
            -
            ###  | 
| 766 | 
            +
            ### Security
         | 
| 767 | 
            +
             | 
| 768 | 
            +
            #### Encryption and Authentication using SSL
         | 
| 725 769 |  | 
| 726 770 | 
             
            By default, communication between Kafka clients and brokers is unencrypted and unauthenticated. Kafka 0.9 added optional support for [encryption and client authentication and authorization](http://kafka.apache.org/documentation.html#security_ssl). There are two layers of security made possible by this:
         | 
| 727 771 |  | 
| 728 | 
            -
             | 
| 772 | 
            +
            ##### Encryption of Communication
         | 
| 729 773 |  | 
| 730 774 | 
             
            By enabling SSL encryption you can have some confidence that messages can be sent to Kafka over an untrusted network without being intercepted.
         | 
| 731 775 |  | 
| @@ -740,7 +784,7 @@ kafka = Kafka.new( | |
| 740 784 |  | 
| 741 785 | 
             
            Without passing the CA certificate to the client it would be impossible to protect against [man-in-the-middle attacks](https://en.wikipedia.org/wiki/Man-in-the-middle_attack).
         | 
| 742 786 |  | 
| 743 | 
            -
             | 
| 787 | 
            +
            ##### Client Authentication
         | 
| 744 788 |  | 
| 745 789 | 
             
            In order to authenticate the client to the cluster, you need to pass in a certificate and key created for the client and trusted by the brokers.
         | 
| 746 790 |  | 
| @@ -755,6 +799,24 @@ kafka = Kafka.new( | |
| 755 799 |  | 
| 756 800 | 
             
            Once client authentication is set up, it is possible to configure the Kafka cluster to [authorize client requests](http://kafka.apache.org/documentation.html#security_authz).
         | 
| 757 801 |  | 
| 802 | 
            +
            ##### Using JKS Certificates
         | 
| 803 | 
            +
             | 
| 804 | 
            +
            Typically, Kafka certificates come in the JKS format, which isn't supported by ruby-kafka. There's [a wiki page](https://github.com/zendesk/ruby-kafka/wiki/Creating-X509-certificates-from-JKS-format) that describes how to generate valid X509 certificates from JKS certificates.
         | 
| 805 | 
            +
             | 
| 806 | 
            +
            #### Authentication using SASL
         | 
| 807 | 
            +
             | 
| 808 | 
            +
            Kafka has support for using SASL to authenticate clients. Currently only the GSSAPI mechanism is supported by ruby-kafka.
         | 
| 809 | 
            +
             | 
| 810 | 
            +
            In order to authenticate using SASL, set your principal and optionally your keytab when initializing the Kafka client:
         | 
| 811 | 
            +
             | 
| 812 | 
            +
            ```ruby
         | 
| 813 | 
            +
            kafka = Kafka.new(
         | 
| 814 | 
            +
              sasl_gssapi_principal: 'kafka/kafka.example.com@EXAMPLE.COM',
         | 
| 815 | 
            +
              sasl_gssapi_keytab: '/etc/keytabs/kafka.keytab',
         | 
| 816 | 
            +
              # ...
         | 
| 817 | 
            +
            )
         | 
| 818 | 
            +
            ```
         | 
| 819 | 
            +
             | 
| 758 820 | 
             
            ## Design
         | 
| 759 821 |  | 
| 760 822 | 
             
            The library has been designed as a layered system, with each layer having a clear responsibility:
         | 
    
        data/lib/kafka.rb
    CHANGED
    
    | @@ -79,6 +79,7 @@ module Kafka | |
| 79 79 | 
             
              class NotCoordinatorForGroup < ProtocolError
         | 
| 80 80 | 
             
              end
         | 
| 81 81 |  | 
| 82 | 
            +
              # 17
         | 
| 82 83 | 
             
              # For a request which attempts to access an invalid topic (e.g. one which has
         | 
| 83 84 | 
             
              # an illegal name), or if an attempt is made to write to an internal topic
         | 
| 84 85 | 
             
              # (such as the consumer offsets topic).
         | 
| @@ -90,37 +91,105 @@ module Kafka | |
| 90 91 | 
             
              class RecordListTooLarge < ProtocolError
         | 
| 91 92 | 
             
              end
         | 
| 92 93 |  | 
| 94 | 
            +
              # 19
         | 
| 93 95 | 
             
              # Returned from a produce request when the number of in-sync replicas is
         | 
| 94 96 | 
             
              # lower than the configured minimum and requiredAcks is -1.
         | 
| 95 97 | 
             
              class NotEnoughReplicas < ProtocolError
         | 
| 96 98 | 
             
              end
         | 
| 97 99 |  | 
| 100 | 
            +
              # 20
         | 
| 98 101 | 
             
              # Returned from a produce request when the message was written to the log,
         | 
| 99 102 | 
             
              # but with fewer in-sync replicas than required.
         | 
| 100 103 | 
             
              class NotEnoughReplicasAfterAppend < ProtocolError
         | 
| 101 104 | 
             
              end
         | 
| 102 105 |  | 
| 106 | 
            +
              # 21
         | 
| 103 107 | 
             
              # Returned from a produce request if the requested requiredAcks is invalid
         | 
| 104 108 | 
             
              # (anything other than -1, 1, or 0).
         | 
| 105 109 | 
             
              class InvalidRequiredAcks < ProtocolError
         | 
| 106 110 | 
             
              end
         | 
| 107 111 |  | 
| 112 | 
            +
              # 9
         | 
| 108 113 | 
             
              # Raised if a replica is expected on a broker, but is not. Can be safely ignored.
         | 
| 109 114 | 
             
              class ReplicaNotAvailable < ProtocolError
         | 
| 110 115 | 
             
              end
         | 
| 111 116 |  | 
| 117 | 
            +
              #
         | 
| 112 118 | 
             
              class UnknownMemberId < ProtocolError
         | 
| 113 119 | 
             
              end
         | 
| 114 120 |  | 
| 121 | 
            +
              # 27
         | 
| 115 122 | 
             
              class RebalanceInProgress < ProtocolError
         | 
| 116 123 | 
             
              end
         | 
| 117 124 |  | 
| 125 | 
            +
              # 22
         | 
| 118 126 | 
             
              class IllegalGeneration < ProtocolError
         | 
| 119 127 | 
             
              end
         | 
| 120 128 |  | 
| 129 | 
            +
              # 26
         | 
| 121 130 | 
             
              class InvalidSessionTimeout < ProtocolError
         | 
| 122 131 | 
             
              end
         | 
| 123 132 |  | 
| 133 | 
            +
              # 28
         | 
| 134 | 
            +
              class InvalidCommitOffsetSize < ProtocolError
         | 
| 135 | 
            +
              end
         | 
| 136 | 
            +
             | 
| 137 | 
            +
              # 29
         | 
| 138 | 
            +
              class TopicAuthorizationCode < ProtocolError
         | 
| 139 | 
            +
              end
         | 
| 140 | 
            +
             | 
| 141 | 
            +
              # 30
         | 
| 142 | 
            +
              class GroupAuthorizationCode < ProtocolError
         | 
| 143 | 
            +
              end
         | 
| 144 | 
            +
             | 
| 145 | 
            +
              # 31
         | 
| 146 | 
            +
              class ClusterAuthorizationCode < ProtocolError
         | 
| 147 | 
            +
              end
         | 
| 148 | 
            +
             | 
| 149 | 
            +
              # 32
         | 
| 150 | 
            +
              class InvalidTimestamp < ProtocolError
         | 
| 151 | 
            +
              end
         | 
| 152 | 
            +
             | 
| 153 | 
            +
              # 33
         | 
| 154 | 
            +
              class UnsupportedSaslMechanism < ProtocolError
         | 
| 155 | 
            +
              end
         | 
| 156 | 
            +
             | 
| 157 | 
            +
              # 34
         | 
| 158 | 
            +
              class InvalidSaslState < ProtocolError
         | 
| 159 | 
            +
              end
         | 
| 160 | 
            +
             | 
| 161 | 
            +
              # 35
         | 
| 162 | 
            +
              class UnsupportedVersion < ProtocolError
         | 
| 163 | 
            +
              end
         | 
| 164 | 
            +
             | 
| 165 | 
            +
              # 36
         | 
| 166 | 
            +
              class TopicAlreadyExists < Error
         | 
| 167 | 
            +
              end
         | 
| 168 | 
            +
             | 
| 169 | 
            +
              # 37
         | 
| 170 | 
            +
              class InvalidPartitions < Error
         | 
| 171 | 
            +
              end
         | 
| 172 | 
            +
             | 
| 173 | 
            +
              # 38
         | 
| 174 | 
            +
              class InvalidReplicationFactor < Error
         | 
| 175 | 
            +
              end
         | 
| 176 | 
            +
             | 
| 177 | 
            +
              # 39
         | 
| 178 | 
            +
              class InvalidReplicaAssignment < Error
         | 
| 179 | 
            +
              end
         | 
| 180 | 
            +
             | 
| 181 | 
            +
              # 40
         | 
| 182 | 
            +
              class InvalidConfig < Error
         | 
| 183 | 
            +
              end
         | 
| 184 | 
            +
             | 
| 185 | 
            +
              # 41
         | 
| 186 | 
            +
              class NotController < Error
         | 
| 187 | 
            +
              end
         | 
| 188 | 
            +
             | 
| 189 | 
            +
              # 42
         | 
| 190 | 
            +
              class InvalidRequest < Error
         | 
| 191 | 
            +
              end
         | 
| 192 | 
            +
             | 
| 124 193 | 
             
              # Raised when there's a network connection error.
         | 
| 125 194 | 
             
              class ConnectionError < Error
         | 
| 126 195 | 
             
              end
         | 
| @@ -142,6 +211,9 @@ module Kafka | |
| 142 211 | 
             
              class FetchError < Error
         | 
| 143 212 | 
             
              end
         | 
| 144 213 |  | 
| 214 | 
            +
              class NoPartitionsAssignedError < Error
         | 
| 215 | 
            +
              end
         | 
| 216 | 
            +
             | 
| 145 217 | 
             
              # Initializes a new Kafka client.
         | 
| 146 218 | 
             
              #
         | 
| 147 219 | 
             
              # @see Client#initialize
         | 
    
        data/lib/kafka/broker.rb
    CHANGED
    
    | @@ -40,6 +40,22 @@ module Kafka | |
| 40 40 | 
             
                  @connection.send_request(request)
         | 
| 41 41 | 
             
                end
         | 
| 42 42 |  | 
| 43 | 
            +
                # Fetches messages asynchronously.
         | 
| 44 | 
            +
                #
         | 
| 45 | 
            +
                # The fetch request is sent to the broker, but the response is not read.
         | 
| 46 | 
            +
                # This allows the broker to process the request, wait for new messages,
         | 
| 47 | 
            +
                # and send a response without the client having to wait. In order to
         | 
| 48 | 
            +
                # read the response, call `#call` on the returned object. This will
         | 
| 49 | 
            +
                # block the caller until the response is available.
         | 
| 50 | 
            +
                #
         | 
| 51 | 
            +
                # @param (see Kafka::Protocol::FetchRequest#initialize)
         | 
| 52 | 
            +
                # @return [Kafka::AsyncResponse]
         | 
| 53 | 
            +
                def fetch_messages_async(**options)
         | 
| 54 | 
            +
                  request = Protocol::FetchRequest.new(**options)
         | 
| 55 | 
            +
             | 
| 56 | 
            +
                  @connection.send_async_request(request)
         | 
| 57 | 
            +
                end
         | 
| 58 | 
            +
             | 
| 43 59 | 
             
                # Lists the offset of the specified topics and partitions.
         | 
| 44 60 | 
             
                #
         | 
| 45 61 | 
             
                # @param (see Kafka::Protocol::ListOffsetRequest#initialize)
         | 
| @@ -101,5 +117,11 @@ module Kafka | |
| 101 117 |  | 
| 102 118 | 
             
                  @connection.send_request(request)
         | 
| 103 119 | 
             
                end
         | 
| 120 | 
            +
             | 
| 121 | 
            +
                def sasl_handshake(**options)
         | 
| 122 | 
            +
                  request = Protocol::SaslHandshakeRequest(**options)
         | 
| 123 | 
            +
             | 
| 124 | 
            +
                  @connection.send_request(request)
         | 
| 125 | 
            +
                end
         | 
| 104 126 | 
             
              end
         | 
| 105 127 | 
             
            end
         | 
    
        data/lib/kafka/client.rb
    CHANGED
    
    | @@ -40,8 +40,14 @@ module Kafka | |
| 40 40 | 
             
                # @param ssl_client_cert_key [String, nil] a PEM encoded client cert key to use with an
         | 
| 41 41 | 
             
                #   SSL connection. Must be used in combination with ssl_client_cert.
         | 
| 42 42 | 
             
                #
         | 
| 43 | 
            +
                # @param sasl_gssapi_principal [String, nil] a KRB5 principal
         | 
| 44 | 
            +
                #
         | 
| 45 | 
            +
                # @param sasl_gssapi_keytab [String, nil] a KRB5 keytab filepath
         | 
| 46 | 
            +
                #
         | 
| 43 47 | 
             
                # @return [Client]
         | 
| 44 | 
            -
                def initialize(seed_brokers:, client_id: "ruby-kafka", logger: nil, connect_timeout: nil, socket_timeout: nil, | 
| 48 | 
            +
                def initialize(seed_brokers:, client_id: "ruby-kafka", logger: nil, connect_timeout: nil, socket_timeout: nil,
         | 
| 49 | 
            +
                               ssl_ca_cert: nil, ssl_client_cert: nil, ssl_client_cert_key: nil,
         | 
| 50 | 
            +
                               sasl_gssapi_principal: nil, sasl_gssapi_keytab: nil)
         | 
| 45 51 | 
             
                  @logger = logger || Logger.new(nil)
         | 
| 46 52 | 
             
                  @instrumenter = Instrumenter.new(client_id: client_id)
         | 
| 47 53 | 
             
                  @seed_brokers = normalize_seed_brokers(seed_brokers)
         | 
| @@ -55,6 +61,8 @@ module Kafka | |
| 55 61 | 
             
                    ssl_context: ssl_context,
         | 
| 56 62 | 
             
                    logger: @logger,
         | 
| 57 63 | 
             
                    instrumenter: @instrumenter,
         | 
| 64 | 
            +
                    sasl_gssapi_principal: sasl_gssapi_principal,
         | 
| 65 | 
            +
                    sasl_gssapi_keytab: sasl_gssapi_keytab
         | 
| 58 66 | 
             
                  )
         | 
| 59 67 |  | 
| 60 68 | 
             
                  @cluster = initialize_cluster
         | 
| @@ -414,6 +422,26 @@ module Kafka | |
| 414 422 | 
             
                  @cluster.resolve_offset(topic, partition, :latest) - 1
         | 
| 415 423 | 
             
                end
         | 
| 416 424 |  | 
| 425 | 
            +
             | 
| 426 | 
            +
                # Retrieve the offset of the last message in each partition of the specified topics.
         | 
| 427 | 
            +
                #
         | 
| 428 | 
            +
                # @param topics [Array<String>] topic names.
         | 
| 429 | 
            +
                # @return [Hash<String, Hash<Integer, Integer>>]
         | 
| 430 | 
            +
                # @example
         | 
| 431 | 
            +
                #   last_offsets_for('topic-1', 'topic-2') # =>
         | 
| 432 | 
            +
                #   # {
         | 
| 433 | 
            +
                #   #   'topic-1' => { 0 => 100, 1 => 100 },
         | 
| 434 | 
            +
                #   #   'topic-2' => { 0 => 100, 1 => 100 }
         | 
| 435 | 
            +
                #   # }
         | 
| 436 | 
            +
                def last_offsets_for(*topics)
         | 
| 437 | 
            +
                  @cluster.add_target_topics(topics)
         | 
| 438 | 
            +
                  topics.map {|topic|
         | 
| 439 | 
            +
                    partition_ids = @cluster.partitions_for(topic).collect(&:partition_id)
         | 
| 440 | 
            +
                    partition_offsets = @cluster.resolve_offsets(topic, partition_ids, :latest)
         | 
| 441 | 
            +
                    [topic, partition_offsets.collect { |k, v| [k, v - 1] }.to_h]
         | 
| 442 | 
            +
                  }.to_h
         | 
| 443 | 
            +
                end
         | 
| 444 | 
            +
             | 
| 417 445 | 
             
                # Closes all connections to the Kafka brokers and frees up used resources.
         | 
| 418 446 | 
             
                #
         | 
| 419 447 | 
             
                # @return [nil]
         | 
    
        data/lib/kafka/cluster.rb
    CHANGED
    
    | @@ -157,6 +157,9 @@ module Kafka | |
| 157 157 | 
             
                  end
         | 
| 158 158 |  | 
| 159 159 | 
             
                  offsets
         | 
| 160 | 
            +
                rescue Kafka::ProtocolError
         | 
| 161 | 
            +
                  mark_as_stale!
         | 
| 162 | 
            +
                  raise
         | 
| 160 163 | 
             
                end
         | 
| 161 164 |  | 
| 162 165 | 
             
                def resolve_offset(topic, partition, offset)
         | 
| @@ -190,7 +193,7 @@ module Kafka | |
| 190 193 | 
             
                # @raise [ConnectionError] if none of the nodes in `seed_brokers` are available.
         | 
| 191 194 | 
             
                # @return [Protocol::MetadataResponse] the cluster metadata.
         | 
| 192 195 | 
             
                def fetch_cluster_info
         | 
| 193 | 
            -
                  @seed_brokers.each do |node|
         | 
| 196 | 
            +
                  @seed_brokers.shuffle.each do |node|
         | 
| 194 197 | 
             
                    @logger.info "Fetching cluster metadata from #{node}"
         | 
| 195 198 |  | 
| 196 199 | 
             
                    begin
         | 
    
        data/lib/kafka/connection.rb
    CHANGED
    
    | @@ -2,11 +2,41 @@ require "stringio" | |
| 2 2 | 
             
            require "kafka/socket_with_timeout"
         | 
| 3 3 | 
             
            require "kafka/ssl_socket_with_timeout"
         | 
| 4 4 | 
             
            require "kafka/protocol/request_message"
         | 
| 5 | 
            +
            require "kafka/protocol/null_response"
         | 
| 5 6 | 
             
            require "kafka/protocol/encoder"
         | 
| 6 7 | 
             
            require "kafka/protocol/decoder"
         | 
| 7 8 |  | 
| 8 9 | 
             
            module Kafka
         | 
| 9 10 |  | 
| 11 | 
            +
              # An asynchronous response object allows us to deliver a response at some
         | 
| 12 | 
            +
              # later point in time.
         | 
| 13 | 
            +
              #
         | 
| 14 | 
            +
              # When instantiating an AsyncResponse, you provide a response decoder and
         | 
| 15 | 
            +
              # a block that will force the caller to wait until a response is available.
         | 
| 16 | 
            +
              class AsyncResponse
         | 
| 17 | 
            +
                # Use a custom "nil" value so that nil can be an actual value.
         | 
| 18 | 
            +
                MISSING = Object.new
         | 
| 19 | 
            +
             | 
| 20 | 
            +
                def initialize(decoder, &block)
         | 
| 21 | 
            +
                  @decoder = decoder
         | 
| 22 | 
            +
                  @block = block
         | 
| 23 | 
            +
                  @response = MISSING
         | 
| 24 | 
            +
                end
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                # Block until a response is available.
         | 
| 27 | 
            +
                def call
         | 
| 28 | 
            +
                  @block.call if @response == MISSING
         | 
| 29 | 
            +
                  @response
         | 
| 30 | 
            +
                end
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                # Deliver the response data.
         | 
| 33 | 
            +
                #
         | 
| 34 | 
            +
                # After calling this, `#call` will returned the decoded response.
         | 
| 35 | 
            +
                def deliver(data)
         | 
| 36 | 
            +
                  @response = @decoder.decode(data)
         | 
| 37 | 
            +
                end
         | 
| 38 | 
            +
              end
         | 
| 39 | 
            +
             | 
| 10 40 | 
             
              # A connection to a single Kafka broker.
         | 
| 11 41 | 
             
              #
         | 
| 12 42 | 
             
              # Usually you'll need a separate connection to each broker in a cluster, since most
         | 
| @@ -28,6 +58,9 @@ module Kafka | |
| 28 58 | 
             
                SOCKET_TIMEOUT = 10
         | 
| 29 59 | 
             
                CONNECT_TIMEOUT = 10
         | 
| 30 60 |  | 
| 61 | 
            +
                attr_reader :encoder
         | 
| 62 | 
            +
                attr_reader :decoder
         | 
| 63 | 
            +
             | 
| 31 64 | 
             
                # Opens a connection to a Kafka broker.
         | 
| 32 65 | 
             
                #
         | 
| 33 66 | 
             
                # @param host [String] the hostname of the broker.
         | 
| @@ -75,6 +108,18 @@ module Kafka | |
| 75 108 | 
             
                #
         | 
| 76 109 | 
             
                # @return [Object] the response.
         | 
| 77 110 | 
             
                def send_request(request)
         | 
| 111 | 
            +
                  # Immediately block on the asynchronous request.
         | 
| 112 | 
            +
                  send_async_request(request).call
         | 
| 113 | 
            +
                end
         | 
| 114 | 
            +
             | 
| 115 | 
            +
                # Sends a request over the connection.
         | 
| 116 | 
            +
                #
         | 
| 117 | 
            +
                # @param request [#encode, #response_class] the request that should be
         | 
| 118 | 
            +
                #   encoded and written.
         | 
| 119 | 
            +
                #
         | 
| 120 | 
            +
                # @return [AsyncResponse] the async response, allowing the caller to choose
         | 
| 121 | 
            +
                #   when to block.
         | 
| 122 | 
            +
                def send_async_request(request)
         | 
| 78 123 | 
             
                  # Default notification payload.
         | 
| 79 124 | 
             
                  notification = {
         | 
| 80 125 | 
             
                    broker_host: @host,
         | 
| @@ -83,15 +128,41 @@ module Kafka | |
| 83 128 | 
             
                    response_size: 0,
         | 
| 84 129 | 
             
                  }
         | 
| 85 130 |  | 
| 86 | 
            -
                  @instrumenter. | 
| 87 | 
            -
             | 
| 131 | 
            +
                  @instrumenter.start("request.connection", notification)
         | 
| 132 | 
            +
             | 
| 133 | 
            +
                  open unless open?
         | 
| 134 | 
            +
             | 
| 135 | 
            +
                  @correlation_id += 1
         | 
| 88 136 |  | 
| 89 | 
            -
             | 
| 137 | 
            +
                  write_request(request, notification)
         | 
| 90 138 |  | 
| 91 | 
            -
             | 
| 139 | 
            +
                  response_class = request.response_class
         | 
| 140 | 
            +
                  correlation_id = @correlation_id
         | 
| 92 141 |  | 
| 93 | 
            -
             | 
| 94 | 
            -
                     | 
| 142 | 
            +
                  if response_class.nil?
         | 
| 143 | 
            +
                    async_response = AsyncResponse.new(Protocol::NullResponse) { nil }
         | 
| 144 | 
            +
             | 
| 145 | 
            +
                    # Immediately deliver a nil value.
         | 
| 146 | 
            +
                    async_response.deliver(nil)
         | 
| 147 | 
            +
             | 
| 148 | 
            +
                    @instrumenter.finish("request.connection", notification)
         | 
| 149 | 
            +
             | 
| 150 | 
            +
                    async_response
         | 
| 151 | 
            +
                  else
         | 
| 152 | 
            +
                    async_response = AsyncResponse.new(response_class) {
         | 
| 153 | 
            +
                      # A caller is trying to read the response, so we have to wait for it
         | 
| 154 | 
            +
                      # before we can return.
         | 
| 155 | 
            +
                      wait_for_response(correlation_id, notification)
         | 
| 156 | 
            +
             | 
| 157 | 
            +
                      # Once done, we can finish the instrumentation.
         | 
| 158 | 
            +
                      @instrumenter.finish("request.connection", notification)
         | 
| 159 | 
            +
                    }
         | 
| 160 | 
            +
             | 
| 161 | 
            +
                    # Store the asynchronous response so that data can be delivered to it
         | 
| 162 | 
            +
                    # at a later time.
         | 
| 163 | 
            +
                    @pending_async_responses[correlation_id] = async_response
         | 
| 164 | 
            +
             | 
| 165 | 
            +
                    async_response
         | 
| 95 166 | 
             
                  end
         | 
| 96 167 | 
             
                rescue Errno::EPIPE, Errno::ECONNRESET, Errno::ETIMEDOUT, EOFError => e
         | 
| 97 168 | 
             
                  close
         | 
| @@ -115,6 +186,9 @@ module Kafka | |
| 115 186 |  | 
| 116 187 | 
             
                  # Correlation id is initialized to zero and bumped for each request.
         | 
| 117 188 | 
             
                  @correlation_id = 0
         | 
| 189 | 
            +
             | 
| 190 | 
            +
                  # The pipeline of pending response futures must be reset.
         | 
| 191 | 
            +
                  @pending_async_responses = {}
         | 
| 118 192 | 
             
                rescue Errno::ETIMEDOUT => e
         | 
| 119 193 | 
             
                  @logger.error "Timed out while trying to connect to #{self}: #{e}"
         | 
| 120 194 | 
             
                  raise ConnectionError, e
         | 
| @@ -156,8 +230,8 @@ module Kafka | |
| 156 230 | 
             
                #   a given Decoder.
         | 
| 157 231 | 
             
                #
         | 
| 158 232 | 
             
                # @return [nil]
         | 
| 159 | 
            -
                def read_response( | 
| 160 | 
            -
                  @logger.debug "Waiting for response #{ | 
| 233 | 
            +
                def read_response(expected_correlation_id, notification)
         | 
| 234 | 
            +
                  @logger.debug "Waiting for response #{expected_correlation_id} from #{to_s}"
         | 
| 161 235 |  | 
| 162 236 | 
             
                  data = @decoder.bytes
         | 
| 163 237 | 
             
                  notification[:response_size] = data.bytesize
         | 
| @@ -166,32 +240,49 @@ module Kafka | |
| 166 240 | 
             
                  response_decoder = Kafka::Protocol::Decoder.new(buffer)
         | 
| 167 241 |  | 
| 168 242 | 
             
                  correlation_id = response_decoder.int32
         | 
| 169 | 
            -
                  response = response_class.decode(response_decoder)
         | 
| 170 243 |  | 
| 171 244 | 
             
                  @logger.debug "Received response #{correlation_id} from #{to_s}"
         | 
| 172 245 |  | 
| 173 | 
            -
                  return correlation_id,  | 
| 246 | 
            +
                  return correlation_id, response_decoder
         | 
| 174 247 | 
             
                rescue Errno::ETIMEDOUT
         | 
| 175 | 
            -
                  @logger.error "Timed out while waiting for response #{ | 
| 248 | 
            +
                  @logger.error "Timed out while waiting for response #{expected_correlation_id}"
         | 
| 176 249 | 
             
                  raise
         | 
| 250 | 
            +
                rescue Errno::EPIPE, Errno::ECONNRESET, Errno::ETIMEDOUT, EOFError => e
         | 
| 251 | 
            +
                  close
         | 
| 252 | 
            +
             | 
| 253 | 
            +
                  raise ConnectionError, "Connection error: #{e}"
         | 
| 177 254 | 
             
                end
         | 
| 178 255 |  | 
| 179 | 
            -
                def wait_for_response( | 
| 256 | 
            +
                def wait_for_response(expected_correlation_id, notification)
         | 
| 180 257 | 
             
                  loop do
         | 
| 181 | 
            -
                    correlation_id,  | 
| 182 | 
            -
             | 
| 183 | 
            -
                     | 
| 184 | 
            -
             | 
| 185 | 
            -
             | 
| 186 | 
            -
             | 
| 187 | 
            -
             | 
| 188 | 
            -
                       | 
| 189 | 
            -
             | 
| 190 | 
            -
                       | 
| 258 | 
            +
                    correlation_id, data = read_response(expected_correlation_id, notification)
         | 
| 259 | 
            +
             | 
| 260 | 
            +
                    if correlation_id < expected_correlation_id
         | 
| 261 | 
            +
                      # There may have been a previous request that timed out before the client
         | 
| 262 | 
            +
                      # was able to read the response. In that case, the response will still be
         | 
| 263 | 
            +
                      # sitting in the socket waiting to be read. If the response we just read
         | 
| 264 | 
            +
                      # was to a previous request, we deliver it to the pending async response
         | 
| 265 | 
            +
                      # future.
         | 
| 266 | 
            +
                      async_response = @pending_async_responses.delete(correlation_id)
         | 
| 267 | 
            +
                      async_response.deliver(data) if async_response
         | 
| 268 | 
            +
                    elsif correlation_id > expected_correlation_id
         | 
| 269 | 
            +
                      raise Kafka::Error, "Correlation id mismatch: expected #{expected_correlation_id} but got #{correlation_id}"
         | 
| 191 270 | 
             
                    else
         | 
| 192 | 
            -
                       | 
| 271 | 
            +
                      # If the request was asynchronous, deliver the response to the pending
         | 
| 272 | 
            +
                      # async response future.
         | 
| 273 | 
            +
                      async_response = @pending_async_responses.delete(correlation_id)
         | 
| 274 | 
            +
                      async_response.deliver(data)
         | 
| 275 | 
            +
             | 
| 276 | 
            +
                      return async_response.call
         | 
| 193 277 | 
             
                    end
         | 
| 194 278 | 
             
                  end
         | 
| 279 | 
            +
                rescue Errno::EPIPE, Errno::ECONNRESET, Errno::ETIMEDOUT, EOFError => e
         | 
| 280 | 
            +
                  notification[:exception] = [e.class.name, e.message]
         | 
| 281 | 
            +
                  notification[:exception_object] = e
         | 
| 282 | 
            +
             | 
| 283 | 
            +
                  close
         | 
| 284 | 
            +
             | 
| 285 | 
            +
                  raise ConnectionError, "Connection error: #{e}"
         | 
| 195 286 | 
             
                end
         | 
| 196 287 | 
             
              end
         | 
| 197 288 | 
             
            end
         |