ruby-kafka 0.1.0.pre.alpha2 → 0.1.0.pre.beta1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +3 -1
- data/lib/kafka.rb +3 -0
- data/lib/kafka/broker.rb +18 -11
- data/lib/kafka/broker_pool.rb +89 -28
- data/lib/kafka/client.rb +4 -2
- data/lib/kafka/connection.rb +31 -6
- data/lib/kafka/partitioner.rb +13 -0
- data/lib/kafka/producer.rb +52 -5
- data/lib/kafka/protocol.rb +1 -0
- data/lib/kafka/protocol/metadata_response.rb +5 -0
- data/lib/kafka/protocol/request_message.rb +3 -1
- data/lib/kafka/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: eb0de1c04f8551ffe3750b3eac702b51777e525d
|
4
|
+
data.tar.gz: 1e6ed1b055a754d7eb958a03314f50f62138de59
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5ac63915c1bead98581a2b4ddd577887ec5fa7f8d7c14ebcedac3d77e06d24a718a1df3b88961edd13927acdb16e4992acdaaecd2e05dc7fe1e20c511a8ad6c1
|
7
|
+
data.tar.gz: 2273d4da86a6ee82c5c3cc310e1549056638e1c07839abcbbb511f9dc8cb055ed8680e4ee9b5415bbb00b21155c4955f66d7be385423e639c5c27e0e2b38f8c8
|
data/Gemfile
CHANGED
data/lib/kafka.rb
CHANGED
@@ -13,6 +13,9 @@ module Kafka
|
|
13
13
|
NotLeaderForPartition = Class.new(Error)
|
14
14
|
RequestTimedOut = Class.new(Error)
|
15
15
|
|
16
|
+
# Raised if a replica is expected on a broker, but is not. Can be safely ignored.
|
17
|
+
ReplicaNotAvailable = Class.new(Error)
|
18
|
+
|
16
19
|
def self.new(**options)
|
17
20
|
Client.new(**options)
|
18
21
|
end
|
data/lib/kafka/broker.rb
CHANGED
@@ -4,21 +4,23 @@ require "kafka/protocol"
|
|
4
4
|
|
5
5
|
module Kafka
|
6
6
|
class Broker
|
7
|
-
def
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
host: host,
|
12
|
-
port: port,
|
13
|
-
client_id: client_id,
|
14
|
-
logger: logger
|
15
|
-
)
|
7
|
+
def self.connect(node_id: nil, logger:, **options)
|
8
|
+
connection = Connection.new(logger: logger, **options)
|
9
|
+
new(connection: connection, node_id: node_id, logger: logger)
|
10
|
+
end
|
16
11
|
|
12
|
+
def initialize(connection:, node_id: nil, logger:)
|
13
|
+
@connection = connection
|
14
|
+
@node_id = node_id
|
17
15
|
@logger = logger
|
18
16
|
end
|
19
17
|
|
20
18
|
def to_s
|
21
|
-
"#{@
|
19
|
+
"#{@connection} (node_id=#{@node_id.inspect})"
|
20
|
+
end
|
21
|
+
|
22
|
+
def disconnect
|
23
|
+
@connection.close
|
22
24
|
end
|
23
25
|
|
24
26
|
def fetch_metadata(**options)
|
@@ -32,7 +34,12 @@ module Kafka
|
|
32
34
|
Protocol.handle_error(topic.topic_error_code)
|
33
35
|
|
34
36
|
topic.partitions.each do |partition|
|
35
|
-
|
37
|
+
begin
|
38
|
+
Protocol.handle_error(partition.partition_error_code)
|
39
|
+
rescue ReplicaNotAvailable
|
40
|
+
# This error can be safely ignored per the protocol specification.
|
41
|
+
@logger.warn "Replica not available for topic #{topic.topic_name}, partition #{partition.partition_id}"
|
42
|
+
end
|
36
43
|
end
|
37
44
|
end
|
38
45
|
|
data/lib/kafka/broker_pool.rb
CHANGED
@@ -8,6 +8,12 @@ module Kafka
|
|
8
8
|
# partitions to the current leader for those partitions.
|
9
9
|
class BrokerPool
|
10
10
|
|
11
|
+
# The number of times to try to connect to a broker before giving up.
|
12
|
+
MAX_CONNECTION_ATTEMPTS = 3
|
13
|
+
|
14
|
+
# The backoff period between connection retries, in seconds.
|
15
|
+
RETRY_BACKOFF_TIMEOUT = 5
|
16
|
+
|
11
17
|
# Initializes a broker pool with a set of seed brokers.
|
12
18
|
#
|
13
19
|
# The pool will try to fetch cluster metadata from one of the brokers.
|
@@ -15,23 +21,99 @@ module Kafka
|
|
15
21
|
# @param seed_brokers [Array<String>]
|
16
22
|
# @param client_id [String]
|
17
23
|
# @param logger [Logger]
|
18
|
-
def initialize(seed_brokers:, client_id:, logger:)
|
24
|
+
def initialize(seed_brokers:, client_id:, logger:, socket_timeout: nil)
|
19
25
|
@client_id = client_id
|
20
26
|
@logger = logger
|
27
|
+
@socket_timeout = socket_timeout
|
21
28
|
@brokers = {}
|
29
|
+
@seed_brokers = seed_brokers
|
30
|
+
|
31
|
+
refresh
|
32
|
+
end
|
33
|
+
|
34
|
+
# Refreshes the cluster metadata.
|
35
|
+
#
|
36
|
+
# This is used to update the partition leadership information, among other things.
|
37
|
+
# The methods will go through each node listed in `seed_brokers`, connecting to the
|
38
|
+
# first one that is available. This node will be queried for the cluster metadata.
|
39
|
+
#
|
40
|
+
# @raise [ConnectionError] if none of the nodes in `seed_brokers` are available.
|
41
|
+
# @return [nil]
|
42
|
+
def refresh
|
43
|
+
@seed_brokers.each do |node|
|
44
|
+
@logger.info "Trying to initialize broker pool from node #{node}"
|
45
|
+
|
46
|
+
begin
|
47
|
+
host, port = node.split(":", 2)
|
48
|
+
|
49
|
+
broker = Broker.connect(
|
50
|
+
host: host,
|
51
|
+
port: port.to_i,
|
52
|
+
client_id: @client_id,
|
53
|
+
socket_timeout: @socket_timeout,
|
54
|
+
logger: @logger,
|
55
|
+
)
|
56
|
+
|
57
|
+
@cluster_info = broker.fetch_metadata
|
58
|
+
|
59
|
+
@logger.info "Initialized broker pool with brokers: #{@cluster_info.brokers.inspect}"
|
22
60
|
|
23
|
-
|
61
|
+
return
|
62
|
+
rescue Error => e
|
63
|
+
@logger.error "Failed to fetch metadata from broker #{broker}: #{e}"
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
raise ConnectionError, "Could not connect to any of the seed brokers: #{@seed_brokers.inspect}"
|
24
68
|
end
|
25
69
|
|
26
|
-
#
|
70
|
+
# Finds the broker acting as the leader of the given topic and partition and connects to it.
|
71
|
+
#
|
72
|
+
# Note that this call may take a considerable amount of time, since the cached cluster
|
73
|
+
# metadata may be out of date. In that case, the cluster needs to be re-discovered. This
|
74
|
+
# can happen when a broker becomes unavailable, which would trigger a leader election for
|
75
|
+
# the partitions previously owned by that broker. Since this can take some time, this method
|
76
|
+
# will retry up to `MAX_CONNECTION_ATTEMPTS` times, waiting `RETRY_BACKOFF_TIMEOUT` seconds
|
77
|
+
# between each attempt.
|
27
78
|
#
|
28
79
|
# @param topic [String]
|
29
80
|
# @param partition [Integer]
|
81
|
+
# @raise [ConnectionError] if it was not possible to connect to the leader.
|
30
82
|
# @return [Broker] the broker that's currently acting as leader of the partition.
|
31
83
|
def get_leader(topic, partition)
|
32
|
-
|
84
|
+
attempt = 0
|
85
|
+
|
86
|
+
begin
|
87
|
+
leader_id = @cluster_info.find_leader_id(topic, partition)
|
88
|
+
broker_for_id(leader_id)
|
89
|
+
rescue ConnectionError => e
|
90
|
+
@logger.error "Failed to connect to leader for topic `#{topic}`, partition #{partition}"
|
91
|
+
|
92
|
+
if attempt < MAX_CONNECTION_ATTEMPTS
|
93
|
+
attempt += 1
|
94
|
+
|
95
|
+
@logger.info "Rediscovering cluster and retrying"
|
33
96
|
|
34
|
-
|
97
|
+
sleep RETRY_BACKOFF_TIMEOUT
|
98
|
+
refresh
|
99
|
+
retry
|
100
|
+
else
|
101
|
+
@logger.error "Giving up trying to find leader for topic `#{topic}`, partition #{partition}"
|
102
|
+
|
103
|
+
raise e
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def partitions_for(topic)
|
109
|
+
@cluster_info.partitions_for(topic)
|
110
|
+
end
|
111
|
+
|
112
|
+
def shutdown
|
113
|
+
@brokers.each do |id, broker|
|
114
|
+
@logger.info "Disconnecting broker #{id}"
|
115
|
+
broker.disconnect
|
116
|
+
end
|
35
117
|
end
|
36
118
|
|
37
119
|
private
|
@@ -43,35 +125,14 @@ module Kafka
|
|
43
125
|
def connect_to_broker(broker_id)
|
44
126
|
broker_info = @cluster_info.find_broker(broker_id)
|
45
127
|
|
46
|
-
Broker.
|
128
|
+
Broker.connect(
|
47
129
|
host: broker_info.host,
|
48
130
|
port: broker_info.port,
|
49
131
|
node_id: broker_info.node_id,
|
50
132
|
client_id: @client_id,
|
133
|
+
socket_timeout: @socket_timeout,
|
51
134
|
logger: @logger,
|
52
135
|
)
|
53
136
|
end
|
54
|
-
|
55
|
-
def initialize_from_seed_brokers(seed_brokers)
|
56
|
-
seed_brokers.each do |node|
|
57
|
-
@logger.info "Trying to initialize broker pool from node #{node}"
|
58
|
-
|
59
|
-
begin
|
60
|
-
host, port = node.split(":", 2)
|
61
|
-
|
62
|
-
broker = Broker.new(host: host, port: port, client_id: @client_id, logger: @logger)
|
63
|
-
|
64
|
-
@cluster_info = broker.fetch_metadata
|
65
|
-
|
66
|
-
@logger.info "Initialized broker pool with brokers: #{@cluster_info.brokers.inspect}"
|
67
|
-
|
68
|
-
return
|
69
|
-
rescue Error => e
|
70
|
-
@logger.error "Failed to fetch metadata from broker #{broker}: #{e}"
|
71
|
-
end
|
72
|
-
end
|
73
|
-
|
74
|
-
raise ConnectionError, "Could not connect to any of the seed brokers: #{seed_brokers.inspect}"
|
75
|
-
end
|
76
137
|
end
|
77
138
|
end
|
data/lib/kafka/client.rb
CHANGED
@@ -3,17 +3,19 @@ require "kafka/producer"
|
|
3
3
|
|
4
4
|
module Kafka
|
5
5
|
class Client
|
6
|
-
def initialize(seed_brokers:, client_id:, logger:)
|
6
|
+
def initialize(seed_brokers:, client_id:, logger:, socket_timeout: nil)
|
7
7
|
@seed_brokers = seed_brokers
|
8
8
|
@client_id = client_id
|
9
9
|
@logger = logger
|
10
|
+
@socket_timeout = socket_timeout
|
10
11
|
end
|
11
12
|
|
12
13
|
def get_producer(**options)
|
13
14
|
broker_pool = BrokerPool.new(
|
14
15
|
seed_brokers: @seed_brokers,
|
15
16
|
client_id: @client_id,
|
16
|
-
logger: @logger
|
17
|
+
logger: @logger,
|
18
|
+
socket_timeout: @socket_timeout,
|
17
19
|
)
|
18
20
|
|
19
21
|
Producer.new(broker_pool: broker_pool, logger: @logger, **options)
|
data/lib/kafka/connection.rb
CHANGED
@@ -11,7 +11,8 @@ module Kafka
|
|
11
11
|
# requests must be directed specifically to the broker that is currently leader for
|
12
12
|
# the set of topic partitions you want to produce to or consumer from.
|
13
13
|
class Connection
|
14
|
-
|
14
|
+
SOCKET_TIMEOUT = 5
|
15
|
+
CONNECT_TIMEOUT = 10
|
15
16
|
|
16
17
|
# Opens a connection to a Kafka broker.
|
17
18
|
#
|
@@ -21,24 +22,33 @@ module Kafka
|
|
21
22
|
# request to help trace calls and should logically identify the application
|
22
23
|
# making the request.
|
23
24
|
# @param logger [Logger] the logger used to log trace messages.
|
25
|
+
# @param connect_timeout [Integer] the socket timeout for connecting to the broker.
|
26
|
+
# Default is 10 seconds.
|
27
|
+
# @param socket_timeout [Integer] the socket timeout for reading and writing to the
|
28
|
+
# broker. Default is 5 seconds.
|
24
29
|
#
|
25
30
|
# @return [Connection] a new connection.
|
26
|
-
def initialize(host:, port:, client_id:, logger:)
|
31
|
+
def initialize(host:, port:, client_id:, logger:, connect_timeout: nil, socket_timeout: nil)
|
27
32
|
@host, @port, @client_id = host, port, client_id
|
28
33
|
@logger = logger
|
29
34
|
|
35
|
+
@connect_timeout = connect_timeout || CONNECT_TIMEOUT
|
36
|
+
@socket_timeout = socket_timeout || SOCKET_TIMEOUT
|
37
|
+
|
30
38
|
@logger.info "Opening connection to #{@host}:#{@port} with client id #{@client_id}..."
|
31
39
|
|
32
|
-
@socket =
|
40
|
+
@socket = Socket.tcp(host, port, connect_timeout: @connect_timeout)
|
33
41
|
|
34
42
|
@encoder = Kafka::Protocol::Encoder.new(@socket)
|
35
43
|
@decoder = Kafka::Protocol::Decoder.new(@socket)
|
36
44
|
|
37
45
|
# Correlation id is initialized to zero and bumped for each request.
|
38
46
|
@correlation_id = 0
|
39
|
-
rescue
|
47
|
+
rescue Errno::ETIMEDOUT
|
48
|
+
@logger.error "Timed out while trying to connect to #{host}:#{port}: #{e}"
|
49
|
+
raise ConnectionError, e
|
50
|
+
rescue SocketError, Errno::ECONNREFUSED => e
|
40
51
|
@logger.error "Failed to connect to #{host}:#{port}: #{e}"
|
41
|
-
|
42
52
|
raise ConnectionError, e
|
43
53
|
end
|
44
54
|
|
@@ -46,6 +56,11 @@ module Kafka
|
|
46
56
|
"#{@host}:#{@port}"
|
47
57
|
end
|
48
58
|
|
59
|
+
def close
|
60
|
+
@logger.debug "Closing socket to #{to_s}"
|
61
|
+
@socket.close
|
62
|
+
end
|
63
|
+
|
49
64
|
# Sends a request over the connection.
|
50
65
|
#
|
51
66
|
# @param api_key [Integer] the integer code for the API that is invoked.
|
@@ -75,13 +90,18 @@ module Kafka
|
|
75
90
|
|
76
91
|
message = Kafka::Protocol::RequestMessage.new(
|
77
92
|
api_key: api_key,
|
78
|
-
api_version: API_VERSION,
|
79
93
|
correlation_id: @correlation_id,
|
80
94
|
client_id: @client_id,
|
81
95
|
request: request,
|
82
96
|
)
|
83
97
|
|
84
98
|
data = Kafka::Protocol::Encoder.encode_with(message)
|
99
|
+
|
100
|
+
unless IO.select(nil, [@socket], nil, @socket_timeout)
|
101
|
+
@logger.error "Timed out while writing request #{@correlation_id}"
|
102
|
+
raise ConnectionError
|
103
|
+
end
|
104
|
+
|
85
105
|
@encoder.write_bytes(data)
|
86
106
|
|
87
107
|
nil
|
@@ -96,6 +116,11 @@ module Kafka
|
|
96
116
|
def read_response(response_class)
|
97
117
|
@logger.debug "Waiting for response #{@correlation_id} from #{to_s}"
|
98
118
|
|
119
|
+
unless IO.select([@socket], nil, nil, @socket_timeout)
|
120
|
+
@logger.error "Timed out while waiting for response #{@correlation_id}"
|
121
|
+
raise ConnectionError
|
122
|
+
end
|
123
|
+
|
99
124
|
bytes = @decoder.bytes
|
100
125
|
|
101
126
|
buffer = StringIO.new(bytes)
|
data/lib/kafka/producer.rb
CHANGED
@@ -1,13 +1,14 @@
|
|
1
1
|
require "kafka/message"
|
2
2
|
require "kafka/message_set"
|
3
|
+
require "kafka/partitioner"
|
3
4
|
|
4
5
|
module Kafka
|
5
6
|
class Producer
|
6
|
-
# @param timeout [Integer] The number of
|
7
|
+
# @param timeout [Integer] The number of seconds to wait for an
|
7
8
|
# acknowledgement from the broker before timing out.
|
8
9
|
# @param required_acks [Integer] The number of replicas that must acknowledge
|
9
10
|
# a write.
|
10
|
-
def initialize(broker_pool:, logger:, timeout:
|
11
|
+
def initialize(broker_pool:, logger:, timeout: 10, required_acks: 1)
|
11
12
|
@broker_pool = broker_pool
|
12
13
|
@logger = logger
|
13
14
|
@required_acks = required_acks
|
@@ -15,10 +16,50 @@ module Kafka
|
|
15
16
|
@buffered_messages = []
|
16
17
|
end
|
17
18
|
|
18
|
-
|
19
|
-
|
19
|
+
# Writes a message to the specified topic. Note that messages are buffered in
|
20
|
+
# the producer until {#flush} is called.
|
21
|
+
#
|
22
|
+
# == Partitioning
|
23
|
+
#
|
24
|
+
# There are several options for specifying the partition that the message should
|
25
|
+
# be written to. The simplest option is to not specify a partition or partition
|
26
|
+
# key, in which case the message key will be used to select one of the available
|
27
|
+
# partitions. You can also specify the `partition` parameter yourself. This
|
28
|
+
# requires you to know which partitions are available, however. Oftentimes the
|
29
|
+
# best option is to specify the `partition_key` parameter: messages with the
|
30
|
+
# same partition key will always be assigned to the same partition, as long as
|
31
|
+
# the number of partitions doesn't change.
|
32
|
+
#
|
33
|
+
# @param value [String] the message data.
|
34
|
+
# @param key [String] the message key.
|
35
|
+
# @param topic [String] the topic that the message should be written to.
|
36
|
+
# @param partition [Integer] the partition that the message should be written to.
|
37
|
+
# @param partition_key [String] the key that should be used to assign a partition.
|
38
|
+
#
|
39
|
+
# @return [Message] the message that was written.
|
40
|
+
def write(value, key:, topic:, partition: nil, partition_key: nil)
|
41
|
+
if partition.nil?
|
42
|
+
# If no explicit partition key is specified we use the message key instead.
|
43
|
+
partition_key ||= key
|
44
|
+
partitioner = Partitioner.new(@broker_pool.partitions_for(topic))
|
45
|
+
partition = partitioner.partition_for_key(partition_key)
|
46
|
+
end
|
47
|
+
|
48
|
+
message = Message.new(value, key: key, topic: topic, partition: partition)
|
49
|
+
|
50
|
+
@buffered_messages << message
|
51
|
+
|
52
|
+
message
|
20
53
|
end
|
21
54
|
|
55
|
+
# Flushes all messages to the Kafka brokers.
|
56
|
+
#
|
57
|
+
# Depending on the value of `required_acks` used when initializing the producer,
|
58
|
+
# this call may block until the specified number of replicas have acknowledged
|
59
|
+
# the writes. The `timeout` setting places an upper bound on the amount of time
|
60
|
+
# the call will block before failing.
|
61
|
+
#
|
62
|
+
# @return [nil]
|
22
63
|
def flush
|
23
64
|
messages_for_broker = {}
|
24
65
|
|
@@ -37,7 +78,7 @@ module Kafka
|
|
37
78
|
response = broker.produce(
|
38
79
|
messages_for_topics: message_set.to_h,
|
39
80
|
required_acks: @required_acks,
|
40
|
-
timeout: @timeout,
|
81
|
+
timeout: @timeout * 1000, # Kafka expects the timeout in milliseconds.
|
41
82
|
)
|
42
83
|
|
43
84
|
if response
|
@@ -50,6 +91,12 @@ module Kafka
|
|
50
91
|
end
|
51
92
|
|
52
93
|
@buffered_messages.clear
|
94
|
+
|
95
|
+
nil
|
96
|
+
end
|
97
|
+
|
98
|
+
def shutdown
|
99
|
+
@broker_pool.shutdown
|
53
100
|
end
|
54
101
|
end
|
55
102
|
end
|
data/lib/kafka/protocol.rb
CHANGED
@@ -116,6 +116,11 @@ module Kafka
|
|
116
116
|
@brokers.find {|broker| broker.node_id == node_id }
|
117
117
|
end
|
118
118
|
|
119
|
+
def partitions_for(topic_name)
|
120
|
+
topic = @topics.find {|t| t.topic_name == topic_name }
|
121
|
+
topic.partitions
|
122
|
+
end
|
123
|
+
|
119
124
|
# Decodes a MetadataResponse from a {Decoder} containing response data.
|
120
125
|
#
|
121
126
|
# @param decoder [Decoder]
|
@@ -1,7 +1,9 @@
|
|
1
1
|
module Kafka
|
2
2
|
module Protocol
|
3
3
|
class RequestMessage
|
4
|
-
|
4
|
+
API_VERSION = 0
|
5
|
+
|
6
|
+
def initialize(api_key:, api_version: API_VERSION, correlation_id:, client_id:, request:)
|
5
7
|
@api_key = api_key
|
6
8
|
@api_version = api_version
|
7
9
|
@correlation_id = correlation_id
|
data/lib/kafka/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-kafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.0.pre.
|
4
|
+
version: 0.1.0.pre.beta1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Schierbeck
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-01-
|
11
|
+
date: 2016-01-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -78,6 +78,7 @@ files:
|
|
78
78
|
- lib/kafka/connection.rb
|
79
79
|
- lib/kafka/message.rb
|
80
80
|
- lib/kafka/message_set.rb
|
81
|
+
- lib/kafka/partitioner.rb
|
81
82
|
- lib/kafka/producer.rb
|
82
83
|
- lib/kafka/protocol.rb
|
83
84
|
- lib/kafka/protocol/decoder.rb
|