ruby-kafka 0.1.0.pre.beta3 → 0.1.0.pre.beta4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.yardopts +3 -0
- data/README.md +14 -2
- data/examples/simple-producer.rb +34 -0
- data/lib/kafka/broker_pool.rb +4 -2
- data/lib/kafka/client.rb +1 -1
- data/lib/kafka/connection.rb +2 -1
- data/lib/kafka/producer.rb +45 -9
- data/lib/kafka/protocol/encoder.rb +2 -0
- data/lib/kafka/protocol/message.rb +2 -1
- data/lib/kafka/protocol/metadata_response.rb +7 -2
- data/lib/kafka/protocol/produce_request.rb +3 -1
- data/lib/kafka/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 71fcf73c1adb43253684cb0ef1406a0631c3ce3a
|
4
|
+
data.tar.gz: dde9f9123b31f20475c7d280532f0b135f6a84bf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8c44cc61c8b50ab3854830596bfd965837e03e1af97c3917c3ecf4386bab38534184688ae5dc1f4c284c8f2878143637e9dffdfe35e95c504a8f1381ca4da6e5
|
7
|
+
data.tar.gz: 06de9ac78b013cf448e03d0ba4ec879059bfbc04c6626c373e30c4c0c95c3b23282b30c3607df8955ab4c617d5d31d67c8ae80033381212baccf107aefda0489
|
data/.yardopts
ADDED
data/README.md
CHANGED
@@ -39,8 +39,20 @@ kafka = Kafka.new(
|
|
39
39
|
producer = kafka.get_producer
|
40
40
|
|
41
41
|
# `produce` will buffer the message in the producer.
|
42
|
-
producer.produce("hello1",
|
43
|
-
|
42
|
+
producer.produce("hello1", topic: "test-messages")
|
43
|
+
|
44
|
+
# It's possible to specify a message key:
|
45
|
+
producer.produce("hello2", key: "x", topic: "test-messages")
|
46
|
+
|
47
|
+
# If you need to control which partition a message should be written to, you
|
48
|
+
# can pass in the `partition` parameter:
|
49
|
+
producer.produce("hello3", topic: "test-messages", partition: 1)
|
50
|
+
|
51
|
+
# If you don't know exactly how many partitions are in the topic, or you'd
|
52
|
+
# rather have some level of indirection, you can pass in `partition_key`.
|
53
|
+
# Two messages with the same partition key will always be written to the
|
54
|
+
# same partition.
|
55
|
+
producer.produce("hello4", topic: "test-messages", partition_key: "yo")
|
44
56
|
|
45
57
|
# `send_messages` will send the buffered messages to the cluster. Since messages
|
46
58
|
# may be destined for different partitions, this could involve writing to more
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# Reads lines from STDIN, writing them to Kafka.
|
2
|
+
|
3
|
+
$LOAD_PATH.unshift(File.expand_path("../../lib", __FILE__))
|
4
|
+
|
5
|
+
require "kafka"
|
6
|
+
|
7
|
+
logger = Logger.new($stderr)
|
8
|
+
brokers = ENV.fetch("KAFKA_BROKERS").split(",")
|
9
|
+
|
10
|
+
# Make sure to create this topic in your Kafka cluster or configure the
|
11
|
+
# cluster to auto-create topics.
|
12
|
+
topic = "random-messages"
|
13
|
+
|
14
|
+
kafka = Kafka.new(
|
15
|
+
seed_brokers: brokers,
|
16
|
+
client_id: "simple-producer",
|
17
|
+
logger: logger,
|
18
|
+
)
|
19
|
+
|
20
|
+
producer = kafka.get_producer
|
21
|
+
|
22
|
+
begin
|
23
|
+
$stdin.each_with_index do |line, index|
|
24
|
+
producer.produce(line, topic: topic)
|
25
|
+
|
26
|
+
# Send messages for every 10 lines.
|
27
|
+
producer.send_messages if index % 10 == 0
|
28
|
+
end
|
29
|
+
ensure
|
30
|
+
# Make sure to send any remaining messages.
|
31
|
+
producer.send_messages
|
32
|
+
|
33
|
+
producer.shutdown
|
34
|
+
end
|
data/lib/kafka/broker_pool.rb
CHANGED
@@ -66,10 +66,10 @@ module Kafka
|
|
66
66
|
# Fetches the cluster metadata.
|
67
67
|
#
|
68
68
|
# This is used to update the partition leadership information, among other things.
|
69
|
-
# The methods will go through each node listed in
|
69
|
+
# The methods will go through each node listed in `seed_brokers`, connecting to the
|
70
70
|
# first one that is available. This node will be queried for the cluster metadata.
|
71
71
|
#
|
72
|
-
# @raise [ConnectionError] if none of the nodes in
|
72
|
+
# @raise [ConnectionError] if none of the nodes in `seed_brokers` are available.
|
73
73
|
# @return [Protocol::MetadataResponse] the cluster metadata.
|
74
74
|
def fetch_cluster_info
|
75
75
|
@seed_brokers.each do |node|
|
@@ -93,6 +93,8 @@ module Kafka
|
|
93
93
|
return cluster_info
|
94
94
|
rescue Error => e
|
95
95
|
@logger.error "Failed to fetch metadata from #{node}: #{e}"
|
96
|
+
ensure
|
97
|
+
broker.disconnect unless broker.nil?
|
96
98
|
end
|
97
99
|
end
|
98
100
|
|
data/lib/kafka/client.rb
CHANGED
data/lib/kafka/connection.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require "socket"
|
2
|
+
require "stringio"
|
2
3
|
require "kafka/protocol/request_message"
|
3
4
|
require "kafka/protocol/encoder"
|
4
5
|
require "kafka/protocol/decoder"
|
@@ -67,7 +68,7 @@ module Kafka
|
|
67
68
|
# @param request [#encode] the request that should be encoded and written.
|
68
69
|
# @param response_class [#decode] an object that can decode the response.
|
69
70
|
#
|
70
|
-
# @return [Object] the response that was decoded by
|
71
|
+
# @return [Object] the response that was decoded by `response_class`.
|
71
72
|
def request(api_key, request, response_class)
|
72
73
|
write_request(api_key, request)
|
73
74
|
|
data/lib/kafka/producer.rb
CHANGED
@@ -6,18 +6,18 @@ module Kafka
|
|
6
6
|
|
7
7
|
# Allows sending messages to a Kafka cluster.
|
8
8
|
#
|
9
|
-
#
|
9
|
+
# ## Buffering
|
10
10
|
#
|
11
11
|
# The producer buffers pending messages until {#send_messages} is called. Note that there is
|
12
12
|
# a maximum buffer size (default is 1,000 messages) and writing messages after the
|
13
13
|
# buffer has reached this size will result in a BufferOverflow exception. Make sure
|
14
|
-
# to periodically call {#send_messages} or set
|
14
|
+
# to periodically call {#send_messages} or set `max_buffer_size` to an appropriate value.
|
15
15
|
#
|
16
16
|
# Buffering messages and sending them in batches greatly improves performance, so
|
17
17
|
# try to avoid sending messages after every write. The tradeoff between throughput and
|
18
18
|
# message delays depends on your use case.
|
19
19
|
#
|
20
|
-
#
|
20
|
+
# ## Error Handling and Retries
|
21
21
|
#
|
22
22
|
# The design of the error handling is based on having a {MessageBuffer} hold messages
|
23
23
|
# for all topics/partitions. Whenever we want to send messages to the cluster, we
|
@@ -29,7 +29,43 @@ module Kafka
|
|
29
29
|
#
|
30
30
|
# After this, we check if the buffer is empty. If it is, we're all done. If it's
|
31
31
|
# not, we do another round of requests, this time with just the remaining messages.
|
32
|
-
# We do this for as long as
|
32
|
+
# We do this for as long as `max_retries` permits.
|
33
|
+
#
|
34
|
+
# ## Example
|
35
|
+
#
|
36
|
+
# This is an example of an application which reads lines from stdin and writes them
|
37
|
+
# to Kafka:
|
38
|
+
#
|
39
|
+
# require "kafka"
|
40
|
+
#
|
41
|
+
# logger = Logger.new($stderr)
|
42
|
+
# brokers = ENV.fetch("KAFKA_BROKERS").split(",")
|
43
|
+
#
|
44
|
+
# # Make sure to create this topic in your Kafka cluster or configure the
|
45
|
+
# # cluster to auto-create topics.
|
46
|
+
# topic = "random-messages"
|
47
|
+
#
|
48
|
+
# kafka = Kafka.new(
|
49
|
+
# seed_brokers: brokers,
|
50
|
+
# client_id: "simple-producer",
|
51
|
+
# logger: logger,
|
52
|
+
# )
|
53
|
+
#
|
54
|
+
# producer = kafka.get_producer
|
55
|
+
#
|
56
|
+
# begin
|
57
|
+
# $stdin.each_with_index do |line, index|
|
58
|
+
# producer.produce(line, topic: topic)
|
59
|
+
#
|
60
|
+
# # Send messages for every 10 lines.
|
61
|
+
# producer.send_messages if index % 10 == 0
|
62
|
+
# end
|
63
|
+
# ensure
|
64
|
+
# # Make sure to send any remaining messages.
|
65
|
+
# producer.send_messages
|
66
|
+
#
|
67
|
+
# producer.shutdown
|
68
|
+
# end
|
33
69
|
#
|
34
70
|
class Producer
|
35
71
|
|
@@ -68,7 +104,7 @@ module Kafka
|
|
68
104
|
# Produces a message to the specified topic. Note that messages are buffered in
|
69
105
|
# the producer until {#send_messages} is called.
|
70
106
|
#
|
71
|
-
#
|
107
|
+
# ## Partitioning
|
72
108
|
#
|
73
109
|
# There are several options for specifying the partition that the message should
|
74
110
|
# be written to.
|
@@ -77,9 +113,9 @@ module Kafka
|
|
77
113
|
# partition number, in which case the message will be assigned a partition at
|
78
114
|
# random.
|
79
115
|
#
|
80
|
-
# You can also specify the
|
116
|
+
# You can also specify the `partition` parameter yourself. This requires you to
|
81
117
|
# know which partitions are available, however. Oftentimes the best option is
|
82
|
-
# to specify the
|
118
|
+
# to specify the `partition_key` parameter: messages with the same partition
|
83
119
|
# key will always be assigned to the same partition, as long as the number of
|
84
120
|
# partitions doesn't change. You can also omit the partition key and specify
|
85
121
|
# a message key instead. The message key is part of the message payload, and
|
@@ -115,9 +151,9 @@ module Kafka
|
|
115
151
|
|
116
152
|
# Sends all buffered messages to the Kafka brokers.
|
117
153
|
#
|
118
|
-
# Depending on the value of
|
154
|
+
# Depending on the value of `required_acks` used when initializing the producer,
|
119
155
|
# this call may block until the specified number of replicas have acknowledged
|
120
|
-
# the writes. The
|
156
|
+
# the writes. The `ack_timeout` setting places an upper bound on the amount of
|
121
157
|
# time the call will block before failing.
|
122
158
|
#
|
123
159
|
# @raise [FailedToSendMessages] if not all messages could be successfully sent.
|
@@ -8,11 +8,11 @@ module Kafka
|
|
8
8
|
#
|
9
9
|
# * For each broker a node id, host, and port is provided.
|
10
10
|
# * For each topic partition the node id of the broker acting as partition leader,
|
11
|
-
# as well as a list of node ids for the set of replicas, are given. The
|
11
|
+
# as well as a list of node ids for the set of replicas, are given. The `isr` list is
|
12
12
|
# the subset of replicas that are "in sync", i.e. have fully caught up with the
|
13
13
|
# leader.
|
14
14
|
#
|
15
|
-
#
|
15
|
+
# ## API Specification
|
16
16
|
#
|
17
17
|
# MetadataResponse => [Broker][TopicMetadata]
|
18
18
|
# Broker => NodeId Host Port (any number of brokers may be returned)
|
@@ -118,6 +118,11 @@ module Kafka
|
|
118
118
|
|
119
119
|
def partitions_for(topic_name)
|
120
120
|
topic = @topics.find {|t| t.topic_name == topic_name }
|
121
|
+
|
122
|
+
if topic.nil?
|
123
|
+
raise UnknownTopicOrPartition, "unknown topic #{topic_name}"
|
124
|
+
end
|
125
|
+
|
121
126
|
topic.partitions
|
122
127
|
end
|
123
128
|
|
@@ -1,9 +1,11 @@
|
|
1
|
+
require "stringio"
|
2
|
+
|
1
3
|
module Kafka
|
2
4
|
module Protocol
|
3
5
|
|
4
6
|
# A produce request sends a message set to the server.
|
5
7
|
#
|
6
|
-
#
|
8
|
+
# ## API Specification
|
7
9
|
#
|
8
10
|
# ProduceRequest => RequiredAcks Timeout [TopicName [Partition MessageSetSize MessageSet]]
|
9
11
|
# RequiredAcks => int16
|
data/lib/kafka/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-kafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.0.pre.
|
4
|
+
version: 0.1.0.pre.beta4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Schierbeck
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-01
|
11
|
+
date: 2016-02-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -106,6 +106,7 @@ extra_rdoc_files: []
|
|
106
106
|
files:
|
107
107
|
- ".gitignore"
|
108
108
|
- ".rspec"
|
109
|
+
- ".yardopts"
|
109
110
|
- Gemfile
|
110
111
|
- LICENSE.txt
|
111
112
|
- README.md
|
@@ -113,6 +114,7 @@ files:
|
|
113
114
|
- bin/console
|
114
115
|
- bin/setup
|
115
116
|
- circle.yml
|
117
|
+
- examples/simple-producer.rb
|
116
118
|
- kafka.gemspec
|
117
119
|
- lib/kafka.rb
|
118
120
|
- lib/kafka/broker.rb
|