ruby-kafka 0.1.0.pre.alpha2 → 0.1.0.pre.beta1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +3 -1
- data/lib/kafka.rb +3 -0
- data/lib/kafka/broker.rb +18 -11
- data/lib/kafka/broker_pool.rb +89 -28
- data/lib/kafka/client.rb +4 -2
- data/lib/kafka/connection.rb +31 -6
- data/lib/kafka/partitioner.rb +13 -0
- data/lib/kafka/producer.rb +52 -5
- data/lib/kafka/protocol.rb +1 -0
- data/lib/kafka/protocol/metadata_response.rb +5 -0
- data/lib/kafka/protocol/request_message.rb +3 -1
- data/lib/kafka/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: eb0de1c04f8551ffe3750b3eac702b51777e525d
|
4
|
+
data.tar.gz: 1e6ed1b055a754d7eb958a03314f50f62138de59
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5ac63915c1bead98581a2b4ddd577887ec5fa7f8d7c14ebcedac3d77e06d24a718a1df3b88961edd13927acdb16e4992acdaaecd2e05dc7fe1e20c511a8ad6c1
|
7
|
+
data.tar.gz: 2273d4da86a6ee82c5c3cc310e1549056638e1c07839abcbbb511f9dc8cb055ed8680e4ee9b5415bbb00b21155c4955f66d7be385423e639c5c27e0e2b38f8c8
|
data/Gemfile
CHANGED
data/lib/kafka.rb
CHANGED
@@ -13,6 +13,9 @@ module Kafka
|
|
13
13
|
NotLeaderForPartition = Class.new(Error)
|
14
14
|
RequestTimedOut = Class.new(Error)
|
15
15
|
|
16
|
+
# Raised if a replica is expected on a broker, but is not. Can be safely ignored.
|
17
|
+
ReplicaNotAvailable = Class.new(Error)
|
18
|
+
|
16
19
|
def self.new(**options)
|
17
20
|
Client.new(**options)
|
18
21
|
end
|
data/lib/kafka/broker.rb
CHANGED
@@ -4,21 +4,23 @@ require "kafka/protocol"
|
|
4
4
|
|
5
5
|
module Kafka
|
6
6
|
class Broker
|
7
|
-
def
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
host: host,
|
12
|
-
port: port,
|
13
|
-
client_id: client_id,
|
14
|
-
logger: logger
|
15
|
-
)
|
7
|
+
def self.connect(node_id: nil, logger:, **options)
|
8
|
+
connection = Connection.new(logger: logger, **options)
|
9
|
+
new(connection: connection, node_id: node_id, logger: logger)
|
10
|
+
end
|
16
11
|
|
12
|
+
def initialize(connection:, node_id: nil, logger:)
|
13
|
+
@connection = connection
|
14
|
+
@node_id = node_id
|
17
15
|
@logger = logger
|
18
16
|
end
|
19
17
|
|
20
18
|
def to_s
|
21
|
-
"#{@
|
19
|
+
"#{@connection} (node_id=#{@node_id.inspect})"
|
20
|
+
end
|
21
|
+
|
22
|
+
def disconnect
|
23
|
+
@connection.close
|
22
24
|
end
|
23
25
|
|
24
26
|
def fetch_metadata(**options)
|
@@ -32,7 +34,12 @@ module Kafka
|
|
32
34
|
Protocol.handle_error(topic.topic_error_code)
|
33
35
|
|
34
36
|
topic.partitions.each do |partition|
|
35
|
-
|
37
|
+
begin
|
38
|
+
Protocol.handle_error(partition.partition_error_code)
|
39
|
+
rescue ReplicaNotAvailable
|
40
|
+
# This error can be safely ignored per the protocol specification.
|
41
|
+
@logger.warn "Replica not available for topic #{topic.topic_name}, partition #{partition.partition_id}"
|
42
|
+
end
|
36
43
|
end
|
37
44
|
end
|
38
45
|
|
data/lib/kafka/broker_pool.rb
CHANGED
@@ -8,6 +8,12 @@ module Kafka
|
|
8
8
|
# partitions to the current leader for those partitions.
|
9
9
|
class BrokerPool
|
10
10
|
|
11
|
+
# The number of times to try to connect to a broker before giving up.
|
12
|
+
MAX_CONNECTION_ATTEMPTS = 3
|
13
|
+
|
14
|
+
# The backoff period between connection retries, in seconds.
|
15
|
+
RETRY_BACKOFF_TIMEOUT = 5
|
16
|
+
|
11
17
|
# Initializes a broker pool with a set of seed brokers.
|
12
18
|
#
|
13
19
|
# The pool will try to fetch cluster metadata from one of the brokers.
|
@@ -15,23 +21,99 @@ module Kafka
|
|
15
21
|
# @param seed_brokers [Array<String>]
|
16
22
|
# @param client_id [String]
|
17
23
|
# @param logger [Logger]
|
18
|
-
def initialize(seed_brokers:, client_id:, logger:)
|
24
|
+
def initialize(seed_brokers:, client_id:, logger:, socket_timeout: nil)
|
19
25
|
@client_id = client_id
|
20
26
|
@logger = logger
|
27
|
+
@socket_timeout = socket_timeout
|
21
28
|
@brokers = {}
|
29
|
+
@seed_brokers = seed_brokers
|
30
|
+
|
31
|
+
refresh
|
32
|
+
end
|
33
|
+
|
34
|
+
# Refreshes the cluster metadata.
|
35
|
+
#
|
36
|
+
# This is used to update the partition leadership information, among other things.
|
37
|
+
# The methods will go through each node listed in `seed_brokers`, connecting to the
|
38
|
+
# first one that is available. This node will be queried for the cluster metadata.
|
39
|
+
#
|
40
|
+
# @raise [ConnectionError] if none of the nodes in `seed_brokers` are available.
|
41
|
+
# @return [nil]
|
42
|
+
def refresh
|
43
|
+
@seed_brokers.each do |node|
|
44
|
+
@logger.info "Trying to initialize broker pool from node #{node}"
|
45
|
+
|
46
|
+
begin
|
47
|
+
host, port = node.split(":", 2)
|
48
|
+
|
49
|
+
broker = Broker.connect(
|
50
|
+
host: host,
|
51
|
+
port: port.to_i,
|
52
|
+
client_id: @client_id,
|
53
|
+
socket_timeout: @socket_timeout,
|
54
|
+
logger: @logger,
|
55
|
+
)
|
56
|
+
|
57
|
+
@cluster_info = broker.fetch_metadata
|
58
|
+
|
59
|
+
@logger.info "Initialized broker pool with brokers: #{@cluster_info.brokers.inspect}"
|
22
60
|
|
23
|
-
|
61
|
+
return
|
62
|
+
rescue Error => e
|
63
|
+
@logger.error "Failed to fetch metadata from broker #{broker}: #{e}"
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
raise ConnectionError, "Could not connect to any of the seed brokers: #{@seed_brokers.inspect}"
|
24
68
|
end
|
25
69
|
|
26
|
-
#
|
70
|
+
# Finds the broker acting as the leader of the given topic and partition and connects to it.
|
71
|
+
#
|
72
|
+
# Note that this call may take a considerable amount of time, since the cached cluster
|
73
|
+
# metadata may be out of date. In that case, the cluster needs to be re-discovered. This
|
74
|
+
# can happen when a broker becomes unavailable, which would trigger a leader election for
|
75
|
+
# the partitions previously owned by that broker. Since this can take some time, this method
|
76
|
+
# will retry up to `MAX_CONNECTION_ATTEMPTS` times, waiting `RETRY_BACKOFF_TIMEOUT` seconds
|
77
|
+
# between each attempt.
|
27
78
|
#
|
28
79
|
# @param topic [String]
|
29
80
|
# @param partition [Integer]
|
81
|
+
# @raise [ConnectionError] if it was not possible to connect to the leader.
|
30
82
|
# @return [Broker] the broker that's currently acting as leader of the partition.
|
31
83
|
def get_leader(topic, partition)
|
32
|
-
|
84
|
+
attempt = 0
|
85
|
+
|
86
|
+
begin
|
87
|
+
leader_id = @cluster_info.find_leader_id(topic, partition)
|
88
|
+
broker_for_id(leader_id)
|
89
|
+
rescue ConnectionError => e
|
90
|
+
@logger.error "Failed to connect to leader for topic `#{topic}`, partition #{partition}"
|
91
|
+
|
92
|
+
if attempt < MAX_CONNECTION_ATTEMPTS
|
93
|
+
attempt += 1
|
94
|
+
|
95
|
+
@logger.info "Rediscovering cluster and retrying"
|
33
96
|
|
34
|
-
|
97
|
+
sleep RETRY_BACKOFF_TIMEOUT
|
98
|
+
refresh
|
99
|
+
retry
|
100
|
+
else
|
101
|
+
@logger.error "Giving up trying to find leader for topic `#{topic}`, partition #{partition}"
|
102
|
+
|
103
|
+
raise e
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def partitions_for(topic)
|
109
|
+
@cluster_info.partitions_for(topic)
|
110
|
+
end
|
111
|
+
|
112
|
+
def shutdown
|
113
|
+
@brokers.each do |id, broker|
|
114
|
+
@logger.info "Disconnecting broker #{id}"
|
115
|
+
broker.disconnect
|
116
|
+
end
|
35
117
|
end
|
36
118
|
|
37
119
|
private
|
@@ -43,35 +125,14 @@ module Kafka
|
|
43
125
|
def connect_to_broker(broker_id)
|
44
126
|
broker_info = @cluster_info.find_broker(broker_id)
|
45
127
|
|
46
|
-
Broker.
|
128
|
+
Broker.connect(
|
47
129
|
host: broker_info.host,
|
48
130
|
port: broker_info.port,
|
49
131
|
node_id: broker_info.node_id,
|
50
132
|
client_id: @client_id,
|
133
|
+
socket_timeout: @socket_timeout,
|
51
134
|
logger: @logger,
|
52
135
|
)
|
53
136
|
end
|
54
|
-
|
55
|
-
def initialize_from_seed_brokers(seed_brokers)
|
56
|
-
seed_brokers.each do |node|
|
57
|
-
@logger.info "Trying to initialize broker pool from node #{node}"
|
58
|
-
|
59
|
-
begin
|
60
|
-
host, port = node.split(":", 2)
|
61
|
-
|
62
|
-
broker = Broker.new(host: host, port: port, client_id: @client_id, logger: @logger)
|
63
|
-
|
64
|
-
@cluster_info = broker.fetch_metadata
|
65
|
-
|
66
|
-
@logger.info "Initialized broker pool with brokers: #{@cluster_info.brokers.inspect}"
|
67
|
-
|
68
|
-
return
|
69
|
-
rescue Error => e
|
70
|
-
@logger.error "Failed to fetch metadata from broker #{broker}: #{e}"
|
71
|
-
end
|
72
|
-
end
|
73
|
-
|
74
|
-
raise ConnectionError, "Could not connect to any of the seed brokers: #{seed_brokers.inspect}"
|
75
|
-
end
|
76
137
|
end
|
77
138
|
end
|
data/lib/kafka/client.rb
CHANGED
@@ -3,17 +3,19 @@ require "kafka/producer"
|
|
3
3
|
|
4
4
|
module Kafka
|
5
5
|
class Client
|
6
|
-
def initialize(seed_brokers:, client_id:, logger:)
|
6
|
+
def initialize(seed_brokers:, client_id:, logger:, socket_timeout: nil)
|
7
7
|
@seed_brokers = seed_brokers
|
8
8
|
@client_id = client_id
|
9
9
|
@logger = logger
|
10
|
+
@socket_timeout = socket_timeout
|
10
11
|
end
|
11
12
|
|
12
13
|
def get_producer(**options)
|
13
14
|
broker_pool = BrokerPool.new(
|
14
15
|
seed_brokers: @seed_brokers,
|
15
16
|
client_id: @client_id,
|
16
|
-
logger: @logger
|
17
|
+
logger: @logger,
|
18
|
+
socket_timeout: @socket_timeout,
|
17
19
|
)
|
18
20
|
|
19
21
|
Producer.new(broker_pool: broker_pool, logger: @logger, **options)
|
data/lib/kafka/connection.rb
CHANGED
@@ -11,7 +11,8 @@ module Kafka
|
|
11
11
|
# requests must be directed specifically to the broker that is currently leader for
|
12
12
|
# the set of topic partitions you want to produce to or consumer from.
|
13
13
|
class Connection
|
14
|
-
|
14
|
+
SOCKET_TIMEOUT = 5
|
15
|
+
CONNECT_TIMEOUT = 10
|
15
16
|
|
16
17
|
# Opens a connection to a Kafka broker.
|
17
18
|
#
|
@@ -21,24 +22,33 @@ module Kafka
|
|
21
22
|
# request to help trace calls and should logically identify the application
|
22
23
|
# making the request.
|
23
24
|
# @param logger [Logger] the logger used to log trace messages.
|
25
|
+
# @param connect_timeout [Integer] the socket timeout for connecting to the broker.
|
26
|
+
# Default is 10 seconds.
|
27
|
+
# @param socket_timeout [Integer] the socket timeout for reading and writing to the
|
28
|
+
# broker. Default is 5 seconds.
|
24
29
|
#
|
25
30
|
# @return [Connection] a new connection.
|
26
|
-
def initialize(host:, port:, client_id:, logger:)
|
31
|
+
def initialize(host:, port:, client_id:, logger:, connect_timeout: nil, socket_timeout: nil)
|
27
32
|
@host, @port, @client_id = host, port, client_id
|
28
33
|
@logger = logger
|
29
34
|
|
35
|
+
@connect_timeout = connect_timeout || CONNECT_TIMEOUT
|
36
|
+
@socket_timeout = socket_timeout || SOCKET_TIMEOUT
|
37
|
+
|
30
38
|
@logger.info "Opening connection to #{@host}:#{@port} with client id #{@client_id}..."
|
31
39
|
|
32
|
-
@socket =
|
40
|
+
@socket = Socket.tcp(host, port, connect_timeout: @connect_timeout)
|
33
41
|
|
34
42
|
@encoder = Kafka::Protocol::Encoder.new(@socket)
|
35
43
|
@decoder = Kafka::Protocol::Decoder.new(@socket)
|
36
44
|
|
37
45
|
# Correlation id is initialized to zero and bumped for each request.
|
38
46
|
@correlation_id = 0
|
39
|
-
rescue
|
47
|
+
rescue Errno::ETIMEDOUT
|
48
|
+
@logger.error "Timed out while trying to connect to #{host}:#{port}: #{e}"
|
49
|
+
raise ConnectionError, e
|
50
|
+
rescue SocketError, Errno::ECONNREFUSED => e
|
40
51
|
@logger.error "Failed to connect to #{host}:#{port}: #{e}"
|
41
|
-
|
42
52
|
raise ConnectionError, e
|
43
53
|
end
|
44
54
|
|
@@ -46,6 +56,11 @@ module Kafka
|
|
46
56
|
"#{@host}:#{@port}"
|
47
57
|
end
|
48
58
|
|
59
|
+
def close
|
60
|
+
@logger.debug "Closing socket to #{to_s}"
|
61
|
+
@socket.close
|
62
|
+
end
|
63
|
+
|
49
64
|
# Sends a request over the connection.
|
50
65
|
#
|
51
66
|
# @param api_key [Integer] the integer code for the API that is invoked.
|
@@ -75,13 +90,18 @@ module Kafka
|
|
75
90
|
|
76
91
|
message = Kafka::Protocol::RequestMessage.new(
|
77
92
|
api_key: api_key,
|
78
|
-
api_version: API_VERSION,
|
79
93
|
correlation_id: @correlation_id,
|
80
94
|
client_id: @client_id,
|
81
95
|
request: request,
|
82
96
|
)
|
83
97
|
|
84
98
|
data = Kafka::Protocol::Encoder.encode_with(message)
|
99
|
+
|
100
|
+
unless IO.select(nil, [@socket], nil, @socket_timeout)
|
101
|
+
@logger.error "Timed out while writing request #{@correlation_id}"
|
102
|
+
raise ConnectionError
|
103
|
+
end
|
104
|
+
|
85
105
|
@encoder.write_bytes(data)
|
86
106
|
|
87
107
|
nil
|
@@ -96,6 +116,11 @@ module Kafka
|
|
96
116
|
def read_response(response_class)
|
97
117
|
@logger.debug "Waiting for response #{@correlation_id} from #{to_s}"
|
98
118
|
|
119
|
+
unless IO.select([@socket], nil, nil, @socket_timeout)
|
120
|
+
@logger.error "Timed out while waiting for response #{@correlation_id}"
|
121
|
+
raise ConnectionError
|
122
|
+
end
|
123
|
+
|
99
124
|
bytes = @decoder.bytes
|
100
125
|
|
101
126
|
buffer = StringIO.new(bytes)
|
data/lib/kafka/producer.rb
CHANGED
@@ -1,13 +1,14 @@
|
|
1
1
|
require "kafka/message"
|
2
2
|
require "kafka/message_set"
|
3
|
+
require "kafka/partitioner"
|
3
4
|
|
4
5
|
module Kafka
|
5
6
|
class Producer
|
6
|
-
# @param timeout [Integer] The number of
|
7
|
+
# @param timeout [Integer] The number of seconds to wait for an
|
7
8
|
# acknowledgement from the broker before timing out.
|
8
9
|
# @param required_acks [Integer] The number of replicas that must acknowledge
|
9
10
|
# a write.
|
10
|
-
def initialize(broker_pool:, logger:, timeout:
|
11
|
+
def initialize(broker_pool:, logger:, timeout: 10, required_acks: 1)
|
11
12
|
@broker_pool = broker_pool
|
12
13
|
@logger = logger
|
13
14
|
@required_acks = required_acks
|
@@ -15,10 +16,50 @@ module Kafka
|
|
15
16
|
@buffered_messages = []
|
16
17
|
end
|
17
18
|
|
18
|
-
|
19
|
-
|
19
|
+
# Writes a message to the specified topic. Note that messages are buffered in
|
20
|
+
# the producer until {#flush} is called.
|
21
|
+
#
|
22
|
+
# == Partitioning
|
23
|
+
#
|
24
|
+
# There are several options for specifying the partition that the message should
|
25
|
+
# be written to. The simplest option is to not specify a partition or partition
|
26
|
+
# key, in which case the message key will be used to select one of the available
|
27
|
+
# partitions. You can also specify the `partition` parameter yourself. This
|
28
|
+
# requires you to know which partitions are available, however. Oftentimes the
|
29
|
+
# best option is to specify the `partition_key` parameter: messages with the
|
30
|
+
# same partition key will always be assigned to the same partition, as long as
|
31
|
+
# the number of partitions doesn't change.
|
32
|
+
#
|
33
|
+
# @param value [String] the message data.
|
34
|
+
# @param key [String] the message key.
|
35
|
+
# @param topic [String] the topic that the message should be written to.
|
36
|
+
# @param partition [Integer] the partition that the message should be written to.
|
37
|
+
# @param partition_key [String] the key that should be used to assign a partition.
|
38
|
+
#
|
39
|
+
# @return [Message] the message that was written.
|
40
|
+
def write(value, key:, topic:, partition: nil, partition_key: nil)
|
41
|
+
if partition.nil?
|
42
|
+
# If no explicit partition key is specified we use the message key instead.
|
43
|
+
partition_key ||= key
|
44
|
+
partitioner = Partitioner.new(@broker_pool.partitions_for(topic))
|
45
|
+
partition = partitioner.partition_for_key(partition_key)
|
46
|
+
end
|
47
|
+
|
48
|
+
message = Message.new(value, key: key, topic: topic, partition: partition)
|
49
|
+
|
50
|
+
@buffered_messages << message
|
51
|
+
|
52
|
+
message
|
20
53
|
end
|
21
54
|
|
55
|
+
# Flushes all messages to the Kafka brokers.
|
56
|
+
#
|
57
|
+
# Depending on the value of `required_acks` used when initializing the producer,
|
58
|
+
# this call may block until the specified number of replicas have acknowledged
|
59
|
+
# the writes. The `timeout` setting places an upper bound on the amount of time
|
60
|
+
# the call will block before failing.
|
61
|
+
#
|
62
|
+
# @return [nil]
|
22
63
|
def flush
|
23
64
|
messages_for_broker = {}
|
24
65
|
|
@@ -37,7 +78,7 @@ module Kafka
|
|
37
78
|
response = broker.produce(
|
38
79
|
messages_for_topics: message_set.to_h,
|
39
80
|
required_acks: @required_acks,
|
40
|
-
timeout: @timeout,
|
81
|
+
timeout: @timeout * 1000, # Kafka expects the timeout in milliseconds.
|
41
82
|
)
|
42
83
|
|
43
84
|
if response
|
@@ -50,6 +91,12 @@ module Kafka
|
|
50
91
|
end
|
51
92
|
|
52
93
|
@buffered_messages.clear
|
94
|
+
|
95
|
+
nil
|
96
|
+
end
|
97
|
+
|
98
|
+
def shutdown
|
99
|
+
@broker_pool.shutdown
|
53
100
|
end
|
54
101
|
end
|
55
102
|
end
|
data/lib/kafka/protocol.rb
CHANGED
@@ -116,6 +116,11 @@ module Kafka
|
|
116
116
|
@brokers.find {|broker| broker.node_id == node_id }
|
117
117
|
end
|
118
118
|
|
119
|
+
def partitions_for(topic_name)
|
120
|
+
topic = @topics.find {|t| t.topic_name == topic_name }
|
121
|
+
topic.partitions
|
122
|
+
end
|
123
|
+
|
119
124
|
# Decodes a MetadataResponse from a {Decoder} containing response data.
|
120
125
|
#
|
121
126
|
# @param decoder [Decoder]
|
@@ -1,7 +1,9 @@
|
|
1
1
|
module Kafka
|
2
2
|
module Protocol
|
3
3
|
class RequestMessage
|
4
|
-
|
4
|
+
API_VERSION = 0
|
5
|
+
|
6
|
+
def initialize(api_key:, api_version: API_VERSION, correlation_id:, client_id:, request:)
|
5
7
|
@api_key = api_key
|
6
8
|
@api_version = api_version
|
7
9
|
@correlation_id = correlation_id
|
data/lib/kafka/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-kafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.0.pre.
|
4
|
+
version: 0.1.0.pre.beta1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Schierbeck
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-01-
|
11
|
+
date: 2016-01-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -78,6 +78,7 @@ files:
|
|
78
78
|
- lib/kafka/connection.rb
|
79
79
|
- lib/kafka/message.rb
|
80
80
|
- lib/kafka/message_set.rb
|
81
|
+
- lib/kafka/partitioner.rb
|
81
82
|
- lib/kafka/producer.rb
|
82
83
|
- lib/kafka/protocol.rb
|
83
84
|
- lib/kafka/protocol/decoder.rb
|