ruby-kafka 0.1.0.pre.alpha2 → 0.1.0.pre.beta1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 945e87a87fcfebd2808de4203027613846a2f7ad
4
- data.tar.gz: f43c576a56aea2f49ef1047aec56f376ec2be0b0
3
+ metadata.gz: eb0de1c04f8551ffe3750b3eac702b51777e525d
4
+ data.tar.gz: 1e6ed1b055a754d7eb958a03314f50f62138de59
5
5
  SHA512:
6
- metadata.gz: c51ccf72b3822a773d013c68fe77665d7d5e9b4021131cb97d8bed65e47c681be18d89f718b63840ef5c463991b0c56c042b3ab747753c23b7f88c0b6131d8d3
7
- data.tar.gz: 4c5c41e3858562fcdac0540652f5d3d8f21334a841b717f7f6281f1ff15a58c3ba165f518fbce9778669d5d35279e5242bb3ca3d1afce62a5c40c01b790e1720
6
+ metadata.gz: 5ac63915c1bead98581a2b4ddd577887ec5fa7f8d7c14ebcedac3d77e06d24a718a1df3b88961edd13927acdb16e4992acdaaecd2e05dc7fe1e20c511a8ad6c1
7
+ data.tar.gz: 2273d4da86a6ee82c5c3cc310e1549056638e1c07839abcbbb511f9dc8cb055ed8680e4ee9b5415bbb00b21155c4955f66d7be385423e639c5c27e0e2b38f8c8
data/Gemfile CHANGED
@@ -1,6 +1,8 @@
1
- source 'https://rubygems.org'
1
+ source "https://rubygems.org"
2
+ ruby "2.2.3"
2
3
 
3
4
  # Specify your gem's dependencies in kafka.gemspec
4
5
  gemspec
5
6
 
6
7
  gem "dotenv"
8
+ gem "docker-api"
@@ -13,6 +13,9 @@ module Kafka
13
13
  NotLeaderForPartition = Class.new(Error)
14
14
  RequestTimedOut = Class.new(Error)
15
15
 
16
+ # Raised if a replica is expected on a broker, but is not. Can be safely ignored.
17
+ ReplicaNotAvailable = Class.new(Error)
18
+
16
19
  def self.new(**options)
17
20
  Client.new(**options)
18
21
  end
@@ -4,21 +4,23 @@ require "kafka/protocol"
4
4
 
5
5
  module Kafka
6
6
  class Broker
7
- def initialize(host:, port:, node_id: nil, client_id:, logger:)
8
- @host, @port, @node_id = host, port, node_id
9
-
10
- @connection = Connection.new(
11
- host: host,
12
- port: port,
13
- client_id: client_id,
14
- logger: logger
15
- )
7
+ def self.connect(node_id: nil, logger:, **options)
8
+ connection = Connection.new(logger: logger, **options)
9
+ new(connection: connection, node_id: node_id, logger: logger)
10
+ end
16
11
 
12
+ def initialize(connection:, node_id: nil, logger:)
13
+ @connection = connection
14
+ @node_id = node_id
17
15
  @logger = logger
18
16
  end
19
17
 
20
18
  def to_s
21
- "#{@host}:#{@port} (node_id=#{@node_id.inspect})"
19
+ "#{@connection} (node_id=#{@node_id.inspect})"
20
+ end
21
+
22
+ def disconnect
23
+ @connection.close
22
24
  end
23
25
 
24
26
  def fetch_metadata(**options)
@@ -32,7 +34,12 @@ module Kafka
32
34
  Protocol.handle_error(topic.topic_error_code)
33
35
 
34
36
  topic.partitions.each do |partition|
35
- Protocol.handle_error(partition.partition_error_code)
37
+ begin
38
+ Protocol.handle_error(partition.partition_error_code)
39
+ rescue ReplicaNotAvailable
40
+ # This error can be safely ignored per the protocol specification.
41
+ @logger.warn "Replica not available for topic #{topic.topic_name}, partition #{partition.partition_id}"
42
+ end
36
43
  end
37
44
  end
38
45
 
@@ -8,6 +8,12 @@ module Kafka
8
8
  # partitions to the current leader for those partitions.
9
9
  class BrokerPool
10
10
 
11
+ # The number of times to try to connect to a broker before giving up.
12
+ MAX_CONNECTION_ATTEMPTS = 3
13
+
14
+ # The backoff period between connection retries, in seconds.
15
+ RETRY_BACKOFF_TIMEOUT = 5
16
+
11
17
  # Initializes a broker pool with a set of seed brokers.
12
18
  #
13
19
  # The pool will try to fetch cluster metadata from one of the brokers.
@@ -15,23 +21,99 @@ module Kafka
15
21
  # @param seed_brokers [Array<String>]
16
22
  # @param client_id [String]
17
23
  # @param logger [Logger]
18
- def initialize(seed_brokers:, client_id:, logger:)
24
+ def initialize(seed_brokers:, client_id:, logger:, socket_timeout: nil)
19
25
  @client_id = client_id
20
26
  @logger = logger
27
+ @socket_timeout = socket_timeout
21
28
  @brokers = {}
29
+ @seed_brokers = seed_brokers
30
+
31
+ refresh
32
+ end
33
+
34
+ # Refreshes the cluster metadata.
35
+ #
36
+ # This is used to update the partition leadership information, among other things.
37
+ # The methods will go through each node listed in `seed_brokers`, connecting to the
38
+ # first one that is available. This node will be queried for the cluster metadata.
39
+ #
40
+ # @raise [ConnectionError] if none of the nodes in `seed_brokers` are available.
41
+ # @return [nil]
42
+ def refresh
43
+ @seed_brokers.each do |node|
44
+ @logger.info "Trying to initialize broker pool from node #{node}"
45
+
46
+ begin
47
+ host, port = node.split(":", 2)
48
+
49
+ broker = Broker.connect(
50
+ host: host,
51
+ port: port.to_i,
52
+ client_id: @client_id,
53
+ socket_timeout: @socket_timeout,
54
+ logger: @logger,
55
+ )
56
+
57
+ @cluster_info = broker.fetch_metadata
58
+
59
+ @logger.info "Initialized broker pool with brokers: #{@cluster_info.brokers.inspect}"
22
60
 
23
- initialize_from_seed_brokers(seed_brokers)
61
+ return
62
+ rescue Error => e
63
+ @logger.error "Failed to fetch metadata from broker #{broker}: #{e}"
64
+ end
65
+ end
66
+
67
+ raise ConnectionError, "Could not connect to any of the seed brokers: #{@seed_brokers.inspect}"
24
68
  end
25
69
 
26
- # Gets the leader of the given topic and partition.
70
+ # Finds the broker acting as the leader of the given topic and partition and connects to it.
71
+ #
72
+ # Note that this call may take a considerable amount of time, since the cached cluster
73
+ # metadata may be out of date. In that case, the cluster needs to be re-discovered. This
74
+ # can happen when a broker becomes unavailable, which would trigger a leader election for
75
+ # the partitions previously owned by that broker. Since this can take some time, this method
76
+ # will retry up to `MAX_CONNECTION_ATTEMPTS` times, waiting `RETRY_BACKOFF_TIMEOUT` seconds
77
+ # between each attempt.
27
78
  #
28
79
  # @param topic [String]
29
80
  # @param partition [Integer]
81
+ # @raise [ConnectionError] if it was not possible to connect to the leader.
30
82
  # @return [Broker] the broker that's currently acting as leader of the partition.
31
83
  def get_leader(topic, partition)
32
- leader_id = @cluster_info.find_leader_id(topic, partition)
84
+ attempt = 0
85
+
86
+ begin
87
+ leader_id = @cluster_info.find_leader_id(topic, partition)
88
+ broker_for_id(leader_id)
89
+ rescue ConnectionError => e
90
+ @logger.error "Failed to connect to leader for topic `#{topic}`, partition #{partition}"
91
+
92
+ if attempt < MAX_CONNECTION_ATTEMPTS
93
+ attempt += 1
94
+
95
+ @logger.info "Rediscovering cluster and retrying"
33
96
 
34
- broker_for_id(leader_id)
97
+ sleep RETRY_BACKOFF_TIMEOUT
98
+ refresh
99
+ retry
100
+ else
101
+ @logger.error "Giving up trying to find leader for topic `#{topic}`, partition #{partition}"
102
+
103
+ raise e
104
+ end
105
+ end
106
+ end
107
+
108
+ def partitions_for(topic)
109
+ @cluster_info.partitions_for(topic)
110
+ end
111
+
112
+ def shutdown
113
+ @brokers.each do |id, broker|
114
+ @logger.info "Disconnecting broker #{id}"
115
+ broker.disconnect
116
+ end
35
117
  end
36
118
 
37
119
  private
@@ -43,35 +125,14 @@ module Kafka
43
125
  def connect_to_broker(broker_id)
44
126
  broker_info = @cluster_info.find_broker(broker_id)
45
127
 
46
- Broker.new(
128
+ Broker.connect(
47
129
  host: broker_info.host,
48
130
  port: broker_info.port,
49
131
  node_id: broker_info.node_id,
50
132
  client_id: @client_id,
133
+ socket_timeout: @socket_timeout,
51
134
  logger: @logger,
52
135
  )
53
136
  end
54
-
55
- def initialize_from_seed_brokers(seed_brokers)
56
- seed_brokers.each do |node|
57
- @logger.info "Trying to initialize broker pool from node #{node}"
58
-
59
- begin
60
- host, port = node.split(":", 2)
61
-
62
- broker = Broker.new(host: host, port: port, client_id: @client_id, logger: @logger)
63
-
64
- @cluster_info = broker.fetch_metadata
65
-
66
- @logger.info "Initialized broker pool with brokers: #{@cluster_info.brokers.inspect}"
67
-
68
- return
69
- rescue Error => e
70
- @logger.error "Failed to fetch metadata from broker #{broker}: #{e}"
71
- end
72
- end
73
-
74
- raise ConnectionError, "Could not connect to any of the seed brokers: #{seed_brokers.inspect}"
75
- end
76
137
  end
77
138
  end
@@ -3,17 +3,19 @@ require "kafka/producer"
3
3
 
4
4
  module Kafka
5
5
  class Client
6
- def initialize(seed_brokers:, client_id:, logger:)
6
+ def initialize(seed_brokers:, client_id:, logger:, socket_timeout: nil)
7
7
  @seed_brokers = seed_brokers
8
8
  @client_id = client_id
9
9
  @logger = logger
10
+ @socket_timeout = socket_timeout
10
11
  end
11
12
 
12
13
  def get_producer(**options)
13
14
  broker_pool = BrokerPool.new(
14
15
  seed_brokers: @seed_brokers,
15
16
  client_id: @client_id,
16
- logger: @logger
17
+ logger: @logger,
18
+ socket_timeout: @socket_timeout,
17
19
  )
18
20
 
19
21
  Producer.new(broker_pool: broker_pool, logger: @logger, **options)
@@ -11,7 +11,8 @@ module Kafka
11
11
  # requests must be directed specifically to the broker that is currently leader for
12
12
  # the set of topic partitions you want to produce to or consumer from.
13
13
  class Connection
14
- API_VERSION = 0
14
+ SOCKET_TIMEOUT = 5
15
+ CONNECT_TIMEOUT = 10
15
16
 
16
17
  # Opens a connection to a Kafka broker.
17
18
  #
@@ -21,24 +22,33 @@ module Kafka
21
22
  # request to help trace calls and should logically identify the application
22
23
  # making the request.
23
24
  # @param logger [Logger] the logger used to log trace messages.
25
+ # @param connect_timeout [Integer] the socket timeout for connecting to the broker.
26
+ # Default is 10 seconds.
27
+ # @param socket_timeout [Integer] the socket timeout for reading and writing to the
28
+ # broker. Default is 5 seconds.
24
29
  #
25
30
  # @return [Connection] a new connection.
26
- def initialize(host:, port:, client_id:, logger:)
31
+ def initialize(host:, port:, client_id:, logger:, connect_timeout: nil, socket_timeout: nil)
27
32
  @host, @port, @client_id = host, port, client_id
28
33
  @logger = logger
29
34
 
35
+ @connect_timeout = connect_timeout || CONNECT_TIMEOUT
36
+ @socket_timeout = socket_timeout || SOCKET_TIMEOUT
37
+
30
38
  @logger.info "Opening connection to #{@host}:#{@port} with client id #{@client_id}..."
31
39
 
32
- @socket = TCPSocket.new(host, port)
40
+ @socket = Socket.tcp(host, port, connect_timeout: @connect_timeout)
33
41
 
34
42
  @encoder = Kafka::Protocol::Encoder.new(@socket)
35
43
  @decoder = Kafka::Protocol::Decoder.new(@socket)
36
44
 
37
45
  # Correlation id is initialized to zero and bumped for each request.
38
46
  @correlation_id = 0
39
- rescue SocketError => e
47
+ rescue Errno::ETIMEDOUT
48
+ @logger.error "Timed out while trying to connect to #{host}:#{port}: #{e}"
49
+ raise ConnectionError, e
50
+ rescue SocketError, Errno::ECONNREFUSED => e
40
51
  @logger.error "Failed to connect to #{host}:#{port}: #{e}"
41
-
42
52
  raise ConnectionError, e
43
53
  end
44
54
 
@@ -46,6 +56,11 @@ module Kafka
46
56
  "#{@host}:#{@port}"
47
57
  end
48
58
 
59
+ def close
60
+ @logger.debug "Closing socket to #{to_s}"
61
+ @socket.close
62
+ end
63
+
49
64
  # Sends a request over the connection.
50
65
  #
51
66
  # @param api_key [Integer] the integer code for the API that is invoked.
@@ -75,13 +90,18 @@ module Kafka
75
90
 
76
91
  message = Kafka::Protocol::RequestMessage.new(
77
92
  api_key: api_key,
78
- api_version: API_VERSION,
79
93
  correlation_id: @correlation_id,
80
94
  client_id: @client_id,
81
95
  request: request,
82
96
  )
83
97
 
84
98
  data = Kafka::Protocol::Encoder.encode_with(message)
99
+
100
+ unless IO.select(nil, [@socket], nil, @socket_timeout)
101
+ @logger.error "Timed out while writing request #{@correlation_id}"
102
+ raise ConnectionError
103
+ end
104
+
85
105
  @encoder.write_bytes(data)
86
106
 
87
107
  nil
@@ -96,6 +116,11 @@ module Kafka
96
116
  def read_response(response_class)
97
117
  @logger.debug "Waiting for response #{@correlation_id} from #{to_s}"
98
118
 
119
+ unless IO.select([@socket], nil, nil, @socket_timeout)
120
+ @logger.error "Timed out while waiting for response #{@correlation_id}"
121
+ raise ConnectionError
122
+ end
123
+
99
124
  bytes = @decoder.bytes
100
125
 
101
126
  buffer = StringIO.new(bytes)
@@ -0,0 +1,13 @@
1
+ require "zlib"
2
+
3
+ module Kafka
4
+ class Partitioner
5
+ def initialize(partitions)
6
+ @partitions = partitions
7
+ end
8
+
9
+ def partition_for_key(key)
10
+ Zlib.crc32(key) % @partitions.count
11
+ end
12
+ end
13
+ end
@@ -1,13 +1,14 @@
1
1
  require "kafka/message"
2
2
  require "kafka/message_set"
3
+ require "kafka/partitioner"
3
4
 
4
5
  module Kafka
5
6
  class Producer
6
- # @param timeout [Integer] The number of milliseconds to wait for an
7
+ # @param timeout [Integer] The number of seconds to wait for an
7
8
  # acknowledgement from the broker before timing out.
8
9
  # @param required_acks [Integer] The number of replicas that must acknowledge
9
10
  # a write.
10
- def initialize(broker_pool:, logger:, timeout: 10_000, required_acks: 1)
11
+ def initialize(broker_pool:, logger:, timeout: 10, required_acks: 1)
11
12
  @broker_pool = broker_pool
12
13
  @logger = logger
13
14
  @required_acks = required_acks
@@ -15,10 +16,50 @@ module Kafka
15
16
  @buffered_messages = []
16
17
  end
17
18
 
18
- def write(value, key:, topic:, partition:)
19
- @buffered_messages << Message.new(value, key: key, topic: topic, partition: partition)
19
+ # Writes a message to the specified topic. Note that messages are buffered in
20
+ # the producer until {#flush} is called.
21
+ #
22
+ # == Partitioning
23
+ #
24
+ # There are several options for specifying the partition that the message should
25
+ # be written to. The simplest option is to not specify a partition or partition
26
+ # key, in which case the message key will be used to select one of the available
27
+ # partitions. You can also specify the `partition` parameter yourself. This
28
+ # requires you to know which partitions are available, however. Oftentimes the
29
+ # best option is to specify the `partition_key` parameter: messages with the
30
+ # same partition key will always be assigned to the same partition, as long as
31
+ # the number of partitions doesn't change.
32
+ #
33
+ # @param value [String] the message data.
34
+ # @param key [String] the message key.
35
+ # @param topic [String] the topic that the message should be written to.
36
+ # @param partition [Integer] the partition that the message should be written to.
37
+ # @param partition_key [String] the key that should be used to assign a partition.
38
+ #
39
+ # @return [Message] the message that was written.
40
+ def write(value, key:, topic:, partition: nil, partition_key: nil)
41
+ if partition.nil?
42
+ # If no explicit partition key is specified we use the message key instead.
43
+ partition_key ||= key
44
+ partitioner = Partitioner.new(@broker_pool.partitions_for(topic))
45
+ partition = partitioner.partition_for_key(partition_key)
46
+ end
47
+
48
+ message = Message.new(value, key: key, topic: topic, partition: partition)
49
+
50
+ @buffered_messages << message
51
+
52
+ message
20
53
  end
21
54
 
55
+ # Flushes all messages to the Kafka brokers.
56
+ #
57
+ # Depending on the value of `required_acks` used when initializing the producer,
58
+ # this call may block until the specified number of replicas have acknowledged
59
+ # the writes. The `timeout` setting places an upper bound on the amount of time
60
+ # the call will block before failing.
61
+ #
62
+ # @return [nil]
22
63
  def flush
23
64
  messages_for_broker = {}
24
65
 
@@ -37,7 +78,7 @@ module Kafka
37
78
  response = broker.produce(
38
79
  messages_for_topics: message_set.to_h,
39
80
  required_acks: @required_acks,
40
- timeout: @timeout,
81
+ timeout: @timeout * 1000, # Kafka expects the timeout in milliseconds.
41
82
  )
42
83
 
43
84
  if response
@@ -50,6 +91,12 @@ module Kafka
50
91
  end
51
92
 
52
93
  @buffered_messages.clear
94
+
95
+ nil
96
+ end
97
+
98
+ def shutdown
99
+ @broker_pool.shutdown
53
100
  end
54
101
  end
55
102
  end
@@ -14,6 +14,7 @@ module Kafka
14
14
  when 5 then raise LeaderNotAvailable
15
15
  when 6 then raise NotLeaderForPartition
16
16
  when 7 then raise RequestTimedOut
17
+ when 9 then raise ReplicaNotAvailable
17
18
  else raise UnknownError, "Unknown error with code #{error_code}"
18
19
  end
19
20
  end
@@ -116,6 +116,11 @@ module Kafka
116
116
  @brokers.find {|broker| broker.node_id == node_id }
117
117
  end
118
118
 
119
+ def partitions_for(topic_name)
120
+ topic = @topics.find {|t| t.topic_name == topic_name }
121
+ topic.partitions
122
+ end
123
+
119
124
  # Decodes a MetadataResponse from a {Decoder} containing response data.
120
125
  #
121
126
  # @param decoder [Decoder]
@@ -1,7 +1,9 @@
1
1
  module Kafka
2
2
  module Protocol
3
3
  class RequestMessage
4
- def initialize(api_key:, api_version:, correlation_id:, client_id:, request:)
4
+ API_VERSION = 0
5
+
6
+ def initialize(api_key:, api_version: API_VERSION, correlation_id:, client_id:, request:)
5
7
  @api_key = api_key
6
8
  @api_version = api_version
7
9
  @correlation_id = correlation_id
@@ -1,3 +1,3 @@
1
1
  module Kafka
2
- VERSION = "0.1.0-alpha2"
2
+ VERSION = "0.1.0-beta1"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby-kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0.pre.alpha2
4
+ version: 0.1.0.pre.beta1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel Schierbeck
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-01-21 00:00:00.000000000 Z
11
+ date: 2016-01-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -78,6 +78,7 @@ files:
78
78
  - lib/kafka/connection.rb
79
79
  - lib/kafka/message.rb
80
80
  - lib/kafka/message_set.rb
81
+ - lib/kafka/partitioner.rb
81
82
  - lib/kafka/producer.rb
82
83
  - lib/kafka/protocol.rb
83
84
  - lib/kafka/protocol/decoder.rb