ruby-kafka 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,130 @@
1
+ require "set"
2
+ require "kafka/broker_pool"
3
+
4
+ module Kafka
5
+
6
+ # A cluster represents the state of a Kafka cluster. It needs to be initialized
7
+ # with a non-empty list of seed brokers. The first seed broker that the cluster can connect
8
+ # to will be asked for the cluster metadata, which allows the cluster to map topic
9
+ # partitions to the current leader for those partitions.
10
+ class Cluster
11
+
12
+ # Initializes a Cluster with a set of seed brokers.
13
+ #
14
+ # The cluster will try to fetch cluster metadata from one of the brokers.
15
+ #
16
+ # @param seed_brokers [Array<String>]
17
+ # @param broker_pool [Kafka::BrokerPool]
18
+ # @param logger [Logger]
19
+ def initialize(seed_brokers:, broker_pool:, logger:)
20
+ if seed_brokers.empty?
21
+ raise ArgumentError, "At least one seed broker must be configured"
22
+ end
23
+
24
+ @logger = logger
25
+ @seed_brokers = seed_brokers
26
+ @broker_pool = broker_pool
27
+ @cluster_info = nil
28
+ @stale = true
29
+
30
+ # This is the set of topics we need metadata for. If empty, metadata for
31
+ # all topics will be fetched.
32
+ @target_topics = Set.new
33
+ end
34
+
35
+ def add_target_topics(topics)
36
+ new_topics = Set.new(topics) - @target_topics
37
+
38
+ unless new_topics.empty?
39
+ @logger.info "New topics added to target list: #{new_topics.to_a.join(', ')}"
40
+
41
+ @target_topics.merge(new_topics)
42
+
43
+ refresh_metadata!
44
+ end
45
+ end
46
+
47
+ def mark_as_stale!
48
+ @stale = true
49
+ end
50
+
51
+ def refresh_metadata!
52
+ @cluster_info = nil
53
+ cluster_info
54
+ end
55
+
56
+ def refresh_metadata_if_necessary!
57
+ refresh_metadata! if @stale
58
+ end
59
+
60
+ # Finds the broker acting as the leader of the given topic and partition.
61
+ #
62
+ # @param topic [String]
63
+ # @param partition [Integer]
64
+ # @return [Broker] the broker that's currently leader.
65
+ def get_leader(topic, partition)
66
+ connect_to_broker(get_leader_id(topic, partition))
67
+ end
68
+
69
+ def partitions_for(topic)
70
+ cluster_info.partitions_for(topic)
71
+ end
72
+
73
+ def topics
74
+ cluster_info.topics.map(&:topic_name)
75
+ end
76
+
77
+ def disconnect
78
+ @broker_pool.close
79
+ end
80
+
81
+ private
82
+
83
+ def get_leader_id(topic, partition)
84
+ cluster_info.find_leader_id(topic, partition)
85
+ end
86
+
87
+ def cluster_info
88
+ @cluster_info ||= fetch_cluster_info
89
+ end
90
+
91
+ # Fetches the cluster metadata.
92
+ #
93
+ # This is used to update the partition leadership information, among other things.
94
+ # The methods will go through each node listed in `seed_brokers`, connecting to the
95
+ # first one that is available. This node will be queried for the cluster metadata.
96
+ #
97
+ # @raise [ConnectionError] if none of the nodes in `seed_brokers` are available.
98
+ # @return [Protocol::MetadataResponse] the cluster metadata.
99
+ def fetch_cluster_info
100
+ @seed_brokers.each do |node|
101
+ @logger.info "Fetching cluster metadata from #{node}"
102
+
103
+ begin
104
+ host, port = node.split(":", 2)
105
+
106
+ broker = @broker_pool.connect(host, port.to_i)
107
+ cluster_info = broker.fetch_metadata(topics: @target_topics)
108
+
109
+ @stale = false
110
+
111
+ @logger.info "Discovered cluster metadata; nodes: #{cluster_info.brokers.join(', ')}"
112
+
113
+ return cluster_info
114
+ rescue Error => e
115
+ @logger.error "Failed to fetch metadata from #{node}: #{e}"
116
+ ensure
117
+ broker.disconnect unless broker.nil?
118
+ end
119
+ end
120
+
121
+ raise ConnectionError, "Could not connect to any of the seed brokers: #{@seed_brokers.join(', ')}"
122
+ end
123
+
124
+ def connect_to_broker(broker_id)
125
+ info = cluster_info.find_broker(broker_id)
126
+
127
+ @broker_pool.connect(info.host, info.port, node_id: info.node_id)
128
+ end
129
+ end
130
+ end
@@ -88,8 +88,6 @@ module Kafka
88
88
  wait_for_response(response_class, notification) unless response_class.nil?
89
89
  end
90
90
  rescue Errno::EPIPE, Errno::ECONNRESET, Errno::ETIMEDOUT, EOFError => e
91
- @logger.error "Connection error: #{e}"
92
-
93
91
  close
94
92
 
95
93
  raise ConnectionError, "Connection error: #{e}"
@@ -98,7 +96,7 @@ module Kafka
98
96
  private
99
97
 
100
98
  def open
101
- @logger.info "Opening connection to #{@host}:#{@port} with client id #{@client_id}..."
99
+ @logger.debug "Opening connection to #{@host}:#{@port} with client id #{@client_id}..."
102
100
 
103
101
  @socket = SocketWithTimeout.new(@host, @port, connect_timeout: @connect_timeout, timeout: @socket_timeout)
104
102
 
@@ -0,0 +1,127 @@
1
+ module Kafka
2
+
3
+ # Fetches messages from one or more partitions.
4
+ #
5
+ # operation = Kafka::FetchOperation.new(
6
+ # cluster: cluster,
7
+ # logger: logger,
8
+ # min_bytes: 1,
9
+ # max_wait_time: 10,
10
+ # )
11
+ #
12
+ # # These calls will schedule fetches from the specified topics/partitions.
13
+ # operation.fetch_from_partition("greetings", 42, offset: :latest, max_bytes: 100000)
14
+ # operation.fetch_from_partition("goodbyes", 13, offset: :latest, max_bytes: 100000)
15
+ #
16
+ # operation.execute
17
+ #
18
+ class FetchOperation
19
+ def initialize(cluster:, logger:, min_bytes:, max_wait_time:)
20
+ @cluster = cluster
21
+ @logger = logger
22
+ @min_bytes = min_bytes
23
+ @max_wait_time = max_wait_time
24
+ @topics = {}
25
+ end
26
+
27
+ def fetch_from_partition(topic, partition, offset:, max_bytes:)
28
+ if offset == :earliest
29
+ offset = -2
30
+ elsif offset == :latest
31
+ offset = -1
32
+ end
33
+
34
+ @topics[topic] ||= {}
35
+ @topics[topic][partition] = {
36
+ fetch_offset: offset,
37
+ max_bytes: max_bytes,
38
+ }
39
+ end
40
+
41
+ def execute
42
+ @cluster.add_target_topics(@topics.keys)
43
+ @cluster.refresh_metadata_if_necessary!
44
+
45
+ topics_by_broker = {}
46
+
47
+ @topics.each do |topic, partitions|
48
+ partitions.each do |partition, options|
49
+ broker = @cluster.get_leader(topic, partition)
50
+
51
+ topics_by_broker[broker] ||= {}
52
+ topics_by_broker[broker][topic] ||= {}
53
+ topics_by_broker[broker][topic][partition] = options
54
+ end
55
+ end
56
+
57
+ topics_by_broker.flat_map {|broker, topics|
58
+ resolve_offsets(broker, topics)
59
+
60
+ options = {
61
+ max_wait_time: @max_wait_time * 1000, # Kafka expects ms, not secs
62
+ min_bytes: @min_bytes,
63
+ topics: topics,
64
+ }
65
+
66
+ response = broker.fetch_messages(**options)
67
+
68
+ response.topics.flat_map {|fetched_topic|
69
+ fetched_topic.partitions.flat_map {|fetched_partition|
70
+ Protocol.handle_error(fetched_partition.error_code)
71
+
72
+ fetched_partition.messages.map {|offset, message|
73
+ FetchedMessage.new(
74
+ value: message.value,
75
+ key: message.key,
76
+ topic: fetched_topic.name,
77
+ partition: fetched_partition.partition,
78
+ offset: offset,
79
+ )
80
+ }
81
+ }
82
+ }
83
+ }
84
+ rescue Kafka::LeaderNotAvailable, Kafka::NotLeaderForPartition
85
+ @cluster.mark_as_stale!
86
+
87
+ raise
88
+ end
89
+
90
+ private
91
+
92
+ def resolve_offsets(broker, topics)
93
+ pending_topics = {}
94
+
95
+ topics.each do |topic, partitions|
96
+ partitions.each do |partition, options|
97
+ offset = options.fetch(:fetch_offset)
98
+ next if offset >= 0
99
+
100
+ @logger.debug "Resolving offset `#{offset}` for #{topic}/#{partition}..."
101
+
102
+ pending_topics[topic] ||= []
103
+ pending_topics[topic] << {
104
+ partition: partition,
105
+ time: offset,
106
+ max_offsets: 1,
107
+ }
108
+ end
109
+ end
110
+
111
+ return topics if pending_topics.empty?
112
+
113
+ response = broker.list_offsets(topics: pending_topics)
114
+
115
+ pending_topics.each do |topic, partitions|
116
+ partitions.each do |options|
117
+ partition = options.fetch(:partition)
118
+ resolved_offset = response.offset_for(topic, partition)
119
+
120
+ @logger.debug "Offset for #{topic}/#{partition} is #{resolved_offset.inspect}"
121
+
122
+ topics[topic][partition][:fetch_offset] = resolved_offset || 0
123
+ end
124
+ end
125
+ end
126
+ end
127
+ end
@@ -0,0 +1,27 @@
1
+ module Kafka
2
+ class FetchedMessage
3
+
4
+ # @return [String] the value of the message.
5
+ attr_reader :value
6
+
7
+ # @return [String] the key of the message.
8
+ attr_reader :key
9
+
10
+ # @return [String] the name of the topic that the message was written to.
11
+ attr_reader :topic
12
+
13
+ # @return [Integer] the partition number that the message was written to.
14
+ attr_reader :partition
15
+
16
+ # @return [Integer] the offset of the message in the partition.
17
+ attr_reader :offset
18
+
19
+ def initialize(value:, key:, topic:, partition:, offset:)
20
+ @value = value
21
+ @key = key
22
+ @topic = topic
23
+ @partition = partition
24
+ @offset = offset
25
+ end
26
+ end
27
+ end
@@ -6,7 +6,7 @@ module Kafka
6
6
  end
7
7
 
8
8
  if defined?(ActiveSupport::Notifications)
9
- Instrumentation = ActiveSupport::Notifications
9
+ Instrumentation = ActiveSupport::Notifications
10
10
  else
11
11
  Instrumentation = NullInstrumentation
12
12
  end
@@ -1,3 +1,5 @@
1
+ require "kafka/protocol/message"
2
+
1
3
  module Kafka
2
4
 
3
5
  # Buffers messages for specific topics/partitions.
@@ -11,8 +13,9 @@ module Kafka
11
13
  @size = 0
12
14
  end
13
15
 
14
- def write(message, topic:, partition:)
16
+ def write(value:, key:, topic:, partition:)
15
17
  @size += 1
18
+ message = Protocol::Message.new(key: key, value: value)
16
19
  buffer_for(topic, partition) << message
17
20
  end
18
21
 
@@ -50,6 +53,10 @@ module Kafka
50
53
  @buffer.delete(topic) if @buffer[topic].empty?
51
54
  end
52
55
 
56
+ def message_count_for_partition(topic:, partition:)
57
+ buffer_for(topic, partition).count
58
+ end
59
+
53
60
  # Clears messages across all topics and partitions.
54
61
  #
55
62
  # @return [nil]
@@ -4,24 +4,29 @@ module Kafka
4
4
 
5
5
  # Assigns partitions to messages.
6
6
  class Partitioner
7
- def initialize(partitions)
8
- @partitions = partitions
9
- end
10
7
 
11
- # Assigns a partition number based on a key.
8
+ # Assigns a partition number based on a partition key. If no explicit
9
+ # partition key is provided, the message key will be used instead.
12
10
  #
13
11
  # If the key is nil, then a random partition is selected. Otherwise, a digest
14
12
  # of the key is used to deterministically find a partition. As long as the
15
13
  # number of partitions doesn't change, the same key will always be assigned
16
14
  # to the same partition.
17
15
  #
18
- # @param key [String, nil] the key to base the partition assignment on, or nil.
16
+ # @param partition_count [Integer] the number of partitions in the topic.
17
+ # @param message [Kafka::PendingMessage] the message that should be assigned
18
+ # a partition.
19
19
  # @return [Integer] the partition number.
20
- def partition_for_key(key)
20
+ def self.partition_for_key(partition_count, message)
21
+ raise ArgumentError if partition_count == 0
22
+
23
+ # If no explicit partition key is specified we use the message key instead.
24
+ key = message.partition_key || message.key
25
+
21
26
  if key.nil?
22
- rand(@partitions.count)
27
+ rand(partition_count)
23
28
  else
24
- Zlib.crc32(key) % @partitions.count
29
+ Zlib.crc32(key) % partition_count
25
30
  end
26
31
  end
27
32
  end
@@ -0,0 +1,13 @@
1
+ module Kafka
2
+ class PendingMessage
3
+ attr_reader :value, :key, :topic, :partition, :partition_key
4
+
5
+ def initialize(value:, key:, topic:, partition:, partition_key:)
6
+ @key = key
7
+ @value = value
8
+ @topic = topic
9
+ @partition = partition
10
+ @partition_key = partition_key
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,116 @@
1
+ module Kafka
2
+ # A produce operation attempts to send all messages in a buffer to the Kafka cluster.
3
+ # Since topics and partitions are spread among all brokers in a cluster, this usually
4
+ # involves sending requests to several or all of the brokers.
5
+ #
6
+ # ## Instrumentation
7
+ #
8
+ # When executing the operation, an `append_message_set.kafka` notification will be
9
+ # emitted for each message set that was successfully appended to a topic partition.
10
+ # The following keys will be found in the payload:
11
+ #
12
+ # * `:topic` — the topic that was written to.
13
+ # * `:partition` — the partition that the message set was appended to.
14
+ # * `:offset` — the offset of the first message in the message set.
15
+ # * `:message_count` — the number of messages that were appended.
16
+ #
17
+ # If there was an error appending the message set, the key `:exception` will be set
18
+ # in the payload. In that case, the message set will most likely not have been
19
+ # appended and will possibly be retried later. Check this key before reporting the
20
+ # operation as successful.
21
+ #
22
+ class ProduceOperation
23
+ def initialize(cluster:, buffer:, required_acks:, ack_timeout:, logger:)
24
+ @cluster = cluster
25
+ @buffer = buffer
26
+ @required_acks = required_acks
27
+ @ack_timeout = ack_timeout
28
+ @logger = logger
29
+ end
30
+
31
+ def execute
32
+ messages_for_broker = {}
33
+
34
+ @buffer.each do |topic, partition, messages|
35
+ begin
36
+ broker = @cluster.get_leader(topic, partition)
37
+
38
+ @logger.debug "Current leader for #{topic}/#{partition} is node #{broker}"
39
+
40
+ messages_for_broker[broker] ||= MessageBuffer.new
41
+ messages_for_broker[broker].concat(messages, topic: topic, partition: partition)
42
+ rescue Kafka::Error => e
43
+ @logger.error "Could not connect to leader for partition #{topic}/#{partition}: #{e}"
44
+
45
+ # We can't send the messages right now, so we'll just keep them in the buffer.
46
+ # We'll mark the cluster as stale in order to force a metadata refresh.
47
+ @cluster.mark_as_stale!
48
+ end
49
+ end
50
+
51
+ messages_for_broker.each do |broker, message_set|
52
+ begin
53
+ @logger.info "Sending #{message_set.size} messages to #{broker}"
54
+
55
+ response = broker.produce(
56
+ messages_for_topics: message_set.to_h,
57
+ required_acks: @required_acks,
58
+ timeout: @ack_timeout * 1000, # Kafka expects the timeout in milliseconds.
59
+ )
60
+
61
+ handle_response(response) if response
62
+ rescue ConnectionError => e
63
+ @logger.error "Could not connect to broker #{broker}: #{e}"
64
+
65
+ # Mark the cluster as stale in order to force a cluster metadata refresh.
66
+ @cluster.mark_as_stale!
67
+ end
68
+ end
69
+ end
70
+
71
+ private
72
+
73
+ def handle_response(response)
74
+ response.each_partition do |topic_info, partition_info|
75
+ topic = topic_info.topic
76
+ partition = partition_info.partition
77
+ offset = partition_info.offset
78
+ message_count = @buffer.message_count_for_partition(topic: topic, partition: partition)
79
+
80
+ begin
81
+ payload = {
82
+ topic: topic,
83
+ partition: partition,
84
+ offset: offset,
85
+ message_count: message_count,
86
+ }
87
+
88
+ Instrumentation.instrument("append_message_set.kafka", payload) do
89
+ Protocol.handle_error(partition_info.error_code)
90
+ end
91
+ rescue Kafka::CorruptMessage
92
+ @logger.error "Corrupt message when writing to #{topic}/#{partition}"
93
+ rescue Kafka::UnknownTopicOrPartition
94
+ @logger.error "Unknown topic or partition #{topic}/#{partition}"
95
+ rescue Kafka::LeaderNotAvailable
96
+ @logger.error "Leader currently not available for #{topic}/#{partition}"
97
+ @cluster.mark_as_stale!
98
+ rescue Kafka::NotLeaderForPartition
99
+ @logger.error "Broker not currently leader for #{topic}/#{partition}"
100
+ @cluster.mark_as_stale!
101
+ rescue Kafka::RequestTimedOut
102
+ @logger.error "Timed out while writing to #{topic}/#{partition}"
103
+ rescue Kafka::NotEnoughReplicas
104
+ @logger.error "Not enough in-sync replicas for #{topic}/#{partition}"
105
+ rescue Kafka::NotEnoughReplicasAfterAppend
106
+ @logger.error "Messages written, but to fewer in-sync replicas than required for #{topic}/#{partition}"
107
+ else
108
+ @logger.debug "Successfully appended #{message_count} messages to #{topic}/#{partition} at offset #{offset}"
109
+
110
+ # The messages were successfully written; clear them from the buffer.
111
+ @buffer.clear_messages(topic: topic, partition: partition)
112
+ end
113
+ end
114
+ end
115
+ end
116
+ end