ruby-kafka 0.1.0.pre.alpha2 → 0.1.0.pre.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 945e87a87fcfebd2808de4203027613846a2f7ad
4
- data.tar.gz: f43c576a56aea2f49ef1047aec56f376ec2be0b0
3
+ metadata.gz: eb0de1c04f8551ffe3750b3eac702b51777e525d
4
+ data.tar.gz: 1e6ed1b055a754d7eb958a03314f50f62138de59
5
5
  SHA512:
6
- metadata.gz: c51ccf72b3822a773d013c68fe77665d7d5e9b4021131cb97d8bed65e47c681be18d89f718b63840ef5c463991b0c56c042b3ab747753c23b7f88c0b6131d8d3
7
- data.tar.gz: 4c5c41e3858562fcdac0540652f5d3d8f21334a841b717f7f6281f1ff15a58c3ba165f518fbce9778669d5d35279e5242bb3ca3d1afce62a5c40c01b790e1720
6
+ metadata.gz: 5ac63915c1bead98581a2b4ddd577887ec5fa7f8d7c14ebcedac3d77e06d24a718a1df3b88961edd13927acdb16e4992acdaaecd2e05dc7fe1e20c511a8ad6c1
7
+ data.tar.gz: 2273d4da86a6ee82c5c3cc310e1549056638e1c07839abcbbb511f9dc8cb055ed8680e4ee9b5415bbb00b21155c4955f66d7be385423e639c5c27e0e2b38f8c8
data/Gemfile CHANGED
@@ -1,6 +1,8 @@
1
- source 'https://rubygems.org'
1
+ source "https://rubygems.org"
2
+ ruby "2.2.3"
2
3
 
3
4
  # Specify your gem's dependencies in kafka.gemspec
4
5
  gemspec
5
6
 
6
7
  gem "dotenv"
8
+ gem "docker-api"
@@ -13,6 +13,9 @@ module Kafka
13
13
  NotLeaderForPartition = Class.new(Error)
14
14
  RequestTimedOut = Class.new(Error)
15
15
 
16
+ # Raised if a replica is expected on a broker, but is not. Can be safely ignored.
17
+ ReplicaNotAvailable = Class.new(Error)
18
+
16
19
  def self.new(**options)
17
20
  Client.new(**options)
18
21
  end
@@ -4,21 +4,23 @@ require "kafka/protocol"
4
4
 
5
5
  module Kafka
6
6
  class Broker
7
- def initialize(host:, port:, node_id: nil, client_id:, logger:)
8
- @host, @port, @node_id = host, port, node_id
9
-
10
- @connection = Connection.new(
11
- host: host,
12
- port: port,
13
- client_id: client_id,
14
- logger: logger
15
- )
7
+ def self.connect(node_id: nil, logger:, **options)
8
+ connection = Connection.new(logger: logger, **options)
9
+ new(connection: connection, node_id: node_id, logger: logger)
10
+ end
16
11
 
12
+ def initialize(connection:, node_id: nil, logger:)
13
+ @connection = connection
14
+ @node_id = node_id
17
15
  @logger = logger
18
16
  end
19
17
 
20
18
  def to_s
21
- "#{@host}:#{@port} (node_id=#{@node_id.inspect})"
19
+ "#{@connection} (node_id=#{@node_id.inspect})"
20
+ end
21
+
22
+ def disconnect
23
+ @connection.close
22
24
  end
23
25
 
24
26
  def fetch_metadata(**options)
@@ -32,7 +34,12 @@ module Kafka
32
34
  Protocol.handle_error(topic.topic_error_code)
33
35
 
34
36
  topic.partitions.each do |partition|
35
- Protocol.handle_error(partition.partition_error_code)
37
+ begin
38
+ Protocol.handle_error(partition.partition_error_code)
39
+ rescue ReplicaNotAvailable
40
+ # This error can be safely ignored per the protocol specification.
41
+ @logger.warn "Replica not available for topic #{topic.topic_name}, partition #{partition.partition_id}"
42
+ end
36
43
  end
37
44
  end
38
45
 
@@ -8,6 +8,12 @@ module Kafka
8
8
  # partitions to the current leader for those partitions.
9
9
  class BrokerPool
10
10
 
11
+ # The number of times to try to connect to a broker before giving up.
12
+ MAX_CONNECTION_ATTEMPTS = 3
13
+
14
+ # The backoff period between connection retries, in seconds.
15
+ RETRY_BACKOFF_TIMEOUT = 5
16
+
11
17
  # Initializes a broker pool with a set of seed brokers.
12
18
  #
13
19
  # The pool will try to fetch cluster metadata from one of the brokers.
@@ -15,23 +21,99 @@ module Kafka
15
21
  # @param seed_brokers [Array<String>]
16
22
  # @param client_id [String]
17
23
  # @param logger [Logger]
18
- def initialize(seed_brokers:, client_id:, logger:)
24
+ def initialize(seed_brokers:, client_id:, logger:, socket_timeout: nil)
19
25
  @client_id = client_id
20
26
  @logger = logger
27
+ @socket_timeout = socket_timeout
21
28
  @brokers = {}
29
+ @seed_brokers = seed_brokers
30
+
31
+ refresh
32
+ end
33
+
34
+ # Refreshes the cluster metadata.
35
+ #
36
+ # This is used to update the partition leadership information, among other things.
37
+ # The methods will go through each node listed in `seed_brokers`, connecting to the
38
+ # first one that is available. This node will be queried for the cluster metadata.
39
+ #
40
+ # @raise [ConnectionError] if none of the nodes in `seed_brokers` are available.
41
+ # @return [nil]
42
+ def refresh
43
+ @seed_brokers.each do |node|
44
+ @logger.info "Trying to initialize broker pool from node #{node}"
45
+
46
+ begin
47
+ host, port = node.split(":", 2)
48
+
49
+ broker = Broker.connect(
50
+ host: host,
51
+ port: port.to_i,
52
+ client_id: @client_id,
53
+ socket_timeout: @socket_timeout,
54
+ logger: @logger,
55
+ )
56
+
57
+ @cluster_info = broker.fetch_metadata
58
+
59
+ @logger.info "Initialized broker pool with brokers: #{@cluster_info.brokers.inspect}"
22
60
 
23
- initialize_from_seed_brokers(seed_brokers)
61
+ return
62
+ rescue Error => e
63
+ @logger.error "Failed to fetch metadata from broker #{broker}: #{e}"
64
+ end
65
+ end
66
+
67
+ raise ConnectionError, "Could not connect to any of the seed brokers: #{@seed_brokers.inspect}"
24
68
  end
25
69
 
26
- # Gets the leader of the given topic and partition.
70
+ # Finds the broker acting as the leader of the given topic and partition and connects to it.
71
+ #
72
+ # Note that this call may take a considerable amount of time, since the cached cluster
73
+ # metadata may be out of date. In that case, the cluster needs to be re-discovered. This
74
+ # can happen when a broker becomes unavailable, which would trigger a leader election for
75
+ # the partitions previously owned by that broker. Since this can take some time, this method
76
+ # will retry up to `MAX_CONNECTION_ATTEMPTS` times, waiting `RETRY_BACKOFF_TIMEOUT` seconds
77
+ # between each attempt.
27
78
  #
28
79
  # @param topic [String]
29
80
  # @param partition [Integer]
81
+ # @raise [ConnectionError] if it was not possible to connect to the leader.
30
82
  # @return [Broker] the broker that's currently acting as leader of the partition.
31
83
  def get_leader(topic, partition)
32
- leader_id = @cluster_info.find_leader_id(topic, partition)
84
+ attempt = 0
85
+
86
+ begin
87
+ leader_id = @cluster_info.find_leader_id(topic, partition)
88
+ broker_for_id(leader_id)
89
+ rescue ConnectionError => e
90
+ @logger.error "Failed to connect to leader for topic `#{topic}`, partition #{partition}"
91
+
92
+ if attempt < MAX_CONNECTION_ATTEMPTS
93
+ attempt += 1
94
+
95
+ @logger.info "Rediscovering cluster and retrying"
33
96
 
34
- broker_for_id(leader_id)
97
+ sleep RETRY_BACKOFF_TIMEOUT
98
+ refresh
99
+ retry
100
+ else
101
+ @logger.error "Giving up trying to find leader for topic `#{topic}`, partition #{partition}"
102
+
103
+ raise e
104
+ end
105
+ end
106
+ end
107
+
108
+ def partitions_for(topic)
109
+ @cluster_info.partitions_for(topic)
110
+ end
111
+
112
+ def shutdown
113
+ @brokers.each do |id, broker|
114
+ @logger.info "Disconnecting broker #{id}"
115
+ broker.disconnect
116
+ end
35
117
  end
36
118
 
37
119
  private
@@ -43,35 +125,14 @@ module Kafka
43
125
  def connect_to_broker(broker_id)
44
126
  broker_info = @cluster_info.find_broker(broker_id)
45
127
 
46
- Broker.new(
128
+ Broker.connect(
47
129
  host: broker_info.host,
48
130
  port: broker_info.port,
49
131
  node_id: broker_info.node_id,
50
132
  client_id: @client_id,
133
+ socket_timeout: @socket_timeout,
51
134
  logger: @logger,
52
135
  )
53
136
  end
54
-
55
- def initialize_from_seed_brokers(seed_brokers)
56
- seed_brokers.each do |node|
57
- @logger.info "Trying to initialize broker pool from node #{node}"
58
-
59
- begin
60
- host, port = node.split(":", 2)
61
-
62
- broker = Broker.new(host: host, port: port, client_id: @client_id, logger: @logger)
63
-
64
- @cluster_info = broker.fetch_metadata
65
-
66
- @logger.info "Initialized broker pool with brokers: #{@cluster_info.brokers.inspect}"
67
-
68
- return
69
- rescue Error => e
70
- @logger.error "Failed to fetch metadata from broker #{broker}: #{e}"
71
- end
72
- end
73
-
74
- raise ConnectionError, "Could not connect to any of the seed brokers: #{seed_brokers.inspect}"
75
- end
76
137
  end
77
138
  end
@@ -3,17 +3,19 @@ require "kafka/producer"
3
3
 
4
4
  module Kafka
5
5
  class Client
6
- def initialize(seed_brokers:, client_id:, logger:)
6
+ def initialize(seed_brokers:, client_id:, logger:, socket_timeout: nil)
7
7
  @seed_brokers = seed_brokers
8
8
  @client_id = client_id
9
9
  @logger = logger
10
+ @socket_timeout = socket_timeout
10
11
  end
11
12
 
12
13
  def get_producer(**options)
13
14
  broker_pool = BrokerPool.new(
14
15
  seed_brokers: @seed_brokers,
15
16
  client_id: @client_id,
16
- logger: @logger
17
+ logger: @logger,
18
+ socket_timeout: @socket_timeout,
17
19
  )
18
20
 
19
21
  Producer.new(broker_pool: broker_pool, logger: @logger, **options)
@@ -11,7 +11,8 @@ module Kafka
11
11
  # requests must be directed specifically to the broker that is currently leader for
12
12
  # the set of topic partitions you want to produce to or consumer from.
13
13
  class Connection
14
- API_VERSION = 0
14
+ SOCKET_TIMEOUT = 5
15
+ CONNECT_TIMEOUT = 10
15
16
 
16
17
  # Opens a connection to a Kafka broker.
17
18
  #
@@ -21,24 +22,33 @@ module Kafka
21
22
  # request to help trace calls and should logically identify the application
22
23
  # making the request.
23
24
  # @param logger [Logger] the logger used to log trace messages.
25
+ # @param connect_timeout [Integer] the socket timeout for connecting to the broker.
26
+ # Default is 10 seconds.
27
+ # @param socket_timeout [Integer] the socket timeout for reading and writing to the
28
+ # broker. Default is 5 seconds.
24
29
  #
25
30
  # @return [Connection] a new connection.
26
- def initialize(host:, port:, client_id:, logger:)
31
+ def initialize(host:, port:, client_id:, logger:, connect_timeout: nil, socket_timeout: nil)
27
32
  @host, @port, @client_id = host, port, client_id
28
33
  @logger = logger
29
34
 
35
+ @connect_timeout = connect_timeout || CONNECT_TIMEOUT
36
+ @socket_timeout = socket_timeout || SOCKET_TIMEOUT
37
+
30
38
  @logger.info "Opening connection to #{@host}:#{@port} with client id #{@client_id}..."
31
39
 
32
- @socket = TCPSocket.new(host, port)
40
+ @socket = Socket.tcp(host, port, connect_timeout: @connect_timeout)
33
41
 
34
42
  @encoder = Kafka::Protocol::Encoder.new(@socket)
35
43
  @decoder = Kafka::Protocol::Decoder.new(@socket)
36
44
 
37
45
  # Correlation id is initialized to zero and bumped for each request.
38
46
  @correlation_id = 0
39
- rescue SocketError => e
47
+ rescue Errno::ETIMEDOUT
48
+ @logger.error "Timed out while trying to connect to #{host}:#{port}: #{e}"
49
+ raise ConnectionError, e
50
+ rescue SocketError, Errno::ECONNREFUSED => e
40
51
  @logger.error "Failed to connect to #{host}:#{port}: #{e}"
41
-
42
52
  raise ConnectionError, e
43
53
  end
44
54
 
@@ -46,6 +56,11 @@ module Kafka
46
56
  "#{@host}:#{@port}"
47
57
  end
48
58
 
59
+ def close
60
+ @logger.debug "Closing socket to #{to_s}"
61
+ @socket.close
62
+ end
63
+
49
64
  # Sends a request over the connection.
50
65
  #
51
66
  # @param api_key [Integer] the integer code for the API that is invoked.
@@ -75,13 +90,18 @@ module Kafka
75
90
 
76
91
  message = Kafka::Protocol::RequestMessage.new(
77
92
  api_key: api_key,
78
- api_version: API_VERSION,
79
93
  correlation_id: @correlation_id,
80
94
  client_id: @client_id,
81
95
  request: request,
82
96
  )
83
97
 
84
98
  data = Kafka::Protocol::Encoder.encode_with(message)
99
+
100
+ unless IO.select(nil, [@socket], nil, @socket_timeout)
101
+ @logger.error "Timed out while writing request #{@correlation_id}"
102
+ raise ConnectionError
103
+ end
104
+
85
105
  @encoder.write_bytes(data)
86
106
 
87
107
  nil
@@ -96,6 +116,11 @@ module Kafka
96
116
  def read_response(response_class)
97
117
  @logger.debug "Waiting for response #{@correlation_id} from #{to_s}"
98
118
 
119
+ unless IO.select([@socket], nil, nil, @socket_timeout)
120
+ @logger.error "Timed out while waiting for response #{@correlation_id}"
121
+ raise ConnectionError
122
+ end
123
+
99
124
  bytes = @decoder.bytes
100
125
 
101
126
  buffer = StringIO.new(bytes)
@@ -0,0 +1,13 @@
1
+ require "zlib"
2
+
3
+ module Kafka
4
+ class Partitioner
5
+ def initialize(partitions)
6
+ @partitions = partitions
7
+ end
8
+
9
+ def partition_for_key(key)
10
+ Zlib.crc32(key) % @partitions.count
11
+ end
12
+ end
13
+ end
@@ -1,13 +1,14 @@
1
1
  require "kafka/message"
2
2
  require "kafka/message_set"
3
+ require "kafka/partitioner"
3
4
 
4
5
  module Kafka
5
6
  class Producer
6
- # @param timeout [Integer] The number of milliseconds to wait for an
7
+ # @param timeout [Integer] The number of seconds to wait for an
7
8
  # acknowledgement from the broker before timing out.
8
9
  # @param required_acks [Integer] The number of replicas that must acknowledge
9
10
  # a write.
10
- def initialize(broker_pool:, logger:, timeout: 10_000, required_acks: 1)
11
+ def initialize(broker_pool:, logger:, timeout: 10, required_acks: 1)
11
12
  @broker_pool = broker_pool
12
13
  @logger = logger
13
14
  @required_acks = required_acks
@@ -15,10 +16,50 @@ module Kafka
15
16
  @buffered_messages = []
16
17
  end
17
18
 
18
- def write(value, key:, topic:, partition:)
19
- @buffered_messages << Message.new(value, key: key, topic: topic, partition: partition)
19
+ # Writes a message to the specified topic. Note that messages are buffered in
20
+ # the producer until {#flush} is called.
21
+ #
22
+ # == Partitioning
23
+ #
24
+ # There are several options for specifying the partition that the message should
25
+ # be written to. The simplest option is to not specify a partition or partition
26
+ # key, in which case the message key will be used to select one of the available
27
+ # partitions. You can also specify the `partition` parameter yourself. This
28
+ # requires you to know which partitions are available, however. Oftentimes the
29
+ # best option is to specify the `partition_key` parameter: messages with the
30
+ # same partition key will always be assigned to the same partition, as long as
31
+ # the number of partitions doesn't change.
32
+ #
33
+ # @param value [String] the message data.
34
+ # @param key [String] the message key.
35
+ # @param topic [String] the topic that the message should be written to.
36
+ # @param partition [Integer] the partition that the message should be written to.
37
+ # @param partition_key [String] the key that should be used to assign a partition.
38
+ #
39
+ # @return [Message] the message that was written.
40
+ def write(value, key:, topic:, partition: nil, partition_key: nil)
41
+ if partition.nil?
42
+ # If no explicit partition key is specified we use the message key instead.
43
+ partition_key ||= key
44
+ partitioner = Partitioner.new(@broker_pool.partitions_for(topic))
45
+ partition = partitioner.partition_for_key(partition_key)
46
+ end
47
+
48
+ message = Message.new(value, key: key, topic: topic, partition: partition)
49
+
50
+ @buffered_messages << message
51
+
52
+ message
20
53
  end
21
54
 
55
+ # Flushes all messages to the Kafka brokers.
56
+ #
57
+ # Depending on the value of `required_acks` used when initializing the producer,
58
+ # this call may block until the specified number of replicas have acknowledged
59
+ # the writes. The `timeout` setting places an upper bound on the amount of time
60
+ # the call will block before failing.
61
+ #
62
+ # @return [nil]
22
63
  def flush
23
64
  messages_for_broker = {}
24
65
 
@@ -37,7 +78,7 @@ module Kafka
37
78
  response = broker.produce(
38
79
  messages_for_topics: message_set.to_h,
39
80
  required_acks: @required_acks,
40
- timeout: @timeout,
81
+ timeout: @timeout * 1000, # Kafka expects the timeout in milliseconds.
41
82
  )
42
83
 
43
84
  if response
@@ -50,6 +91,12 @@ module Kafka
50
91
  end
51
92
 
52
93
  @buffered_messages.clear
94
+
95
+ nil
96
+ end
97
+
98
+ def shutdown
99
+ @broker_pool.shutdown
53
100
  end
54
101
  end
55
102
  end
@@ -14,6 +14,7 @@ module Kafka
14
14
  when 5 then raise LeaderNotAvailable
15
15
  when 6 then raise NotLeaderForPartition
16
16
  when 7 then raise RequestTimedOut
17
+ when 9 then raise ReplicaNotAvailable
17
18
  else raise UnknownError, "Unknown error with code #{error_code}"
18
19
  end
19
20
  end
@@ -116,6 +116,11 @@ module Kafka
116
116
  @brokers.find {|broker| broker.node_id == node_id }
117
117
  end
118
118
 
119
+ def partitions_for(topic_name)
120
+ topic = @topics.find {|t| t.topic_name == topic_name }
121
+ topic.partitions
122
+ end
123
+
119
124
  # Decodes a MetadataResponse from a {Decoder} containing response data.
120
125
  #
121
126
  # @param decoder [Decoder]
@@ -1,7 +1,9 @@
1
1
  module Kafka
2
2
  module Protocol
3
3
  class RequestMessage
4
- def initialize(api_key:, api_version:, correlation_id:, client_id:, request:)
4
+ API_VERSION = 0
5
+
6
+ def initialize(api_key:, api_version: API_VERSION, correlation_id:, client_id:, request:)
5
7
  @api_key = api_key
6
8
  @api_version = api_version
7
9
  @correlation_id = correlation_id
@@ -1,3 +1,3 @@
1
1
  module Kafka
2
- VERSION = "0.1.0-alpha2"
2
+ VERSION = "0.1.0-beta1"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby-kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0.pre.alpha2
4
+ version: 0.1.0.pre.beta1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel Schierbeck
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-01-21 00:00:00.000000000 Z
11
+ date: 2016-01-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -78,6 +78,7 @@ files:
78
78
  - lib/kafka/connection.rb
79
79
  - lib/kafka/message.rb
80
80
  - lib/kafka/message_set.rb
81
+ - lib/kafka/partitioner.rb
81
82
  - lib/kafka/producer.rb
82
83
  - lib/kafka/protocol.rb
83
84
  - lib/kafka/protocol/decoder.rb