ruby-kafka 0.1.0.pre.beta3 → 0.1.0.pre.beta4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.yardopts +3 -0
- data/README.md +14 -2
- data/examples/simple-producer.rb +34 -0
- data/lib/kafka/broker_pool.rb +4 -2
- data/lib/kafka/client.rb +1 -1
- data/lib/kafka/connection.rb +2 -1
- data/lib/kafka/producer.rb +45 -9
- data/lib/kafka/protocol/encoder.rb +2 -0
- data/lib/kafka/protocol/message.rb +2 -1
- data/lib/kafka/protocol/metadata_response.rb +7 -2
- data/lib/kafka/protocol/produce_request.rb +3 -1
- data/lib/kafka/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 71fcf73c1adb43253684cb0ef1406a0631c3ce3a
|
4
|
+
data.tar.gz: dde9f9123b31f20475c7d280532f0b135f6a84bf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8c44cc61c8b50ab3854830596bfd965837e03e1af97c3917c3ecf4386bab38534184688ae5dc1f4c284c8f2878143637e9dffdfe35e95c504a8f1381ca4da6e5
|
7
|
+
data.tar.gz: 06de9ac78b013cf448e03d0ba4ec879059bfbc04c6626c373e30c4c0c95c3b23282b30c3607df8955ab4c617d5d31d67c8ae80033381212baccf107aefda0489
|
data/.yardopts
ADDED
data/README.md
CHANGED
@@ -39,8 +39,20 @@ kafka = Kafka.new(
|
|
39
39
|
producer = kafka.get_producer
|
40
40
|
|
41
41
|
# `produce` will buffer the message in the producer.
|
42
|
-
producer.produce("hello1",
|
43
|
-
|
42
|
+
producer.produce("hello1", topic: "test-messages")
|
43
|
+
|
44
|
+
# It's possible to specify a message key:
|
45
|
+
producer.produce("hello2", key: "x", topic: "test-messages")
|
46
|
+
|
47
|
+
# If you need to control which partition a message should be written to, you
|
48
|
+
# can pass in the `partition` parameter:
|
49
|
+
producer.produce("hello3", topic: "test-messages", partition: 1)
|
50
|
+
|
51
|
+
# If you don't know exactly how many partitions are in the topic, or you'd
|
52
|
+
# rather have some level of indirection, you can pass in `partition_key`.
|
53
|
+
# Two messages with the same partition key will always be written to the
|
54
|
+
# same partition.
|
55
|
+
producer.produce("hello4", topic: "test-messages", partition_key: "yo")
|
44
56
|
|
45
57
|
# `send_messages` will send the buffered messages to the cluster. Since messages
|
46
58
|
# may be destined for different partitions, this could involve writing to more
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# Reads lines from STDIN, writing them to Kafka.
|
2
|
+
|
3
|
+
$LOAD_PATH.unshift(File.expand_path("../../lib", __FILE__))
|
4
|
+
|
5
|
+
require "kafka"
|
6
|
+
|
7
|
+
logger = Logger.new($stderr)
|
8
|
+
brokers = ENV.fetch("KAFKA_BROKERS").split(",")
|
9
|
+
|
10
|
+
# Make sure to create this topic in your Kafka cluster or configure the
|
11
|
+
# cluster to auto-create topics.
|
12
|
+
topic = "random-messages"
|
13
|
+
|
14
|
+
kafka = Kafka.new(
|
15
|
+
seed_brokers: brokers,
|
16
|
+
client_id: "simple-producer",
|
17
|
+
logger: logger,
|
18
|
+
)
|
19
|
+
|
20
|
+
producer = kafka.get_producer
|
21
|
+
|
22
|
+
begin
|
23
|
+
$stdin.each_with_index do |line, index|
|
24
|
+
producer.produce(line, topic: topic)
|
25
|
+
|
26
|
+
# Send messages for every 10 lines.
|
27
|
+
producer.send_messages if index % 10 == 0
|
28
|
+
end
|
29
|
+
ensure
|
30
|
+
# Make sure to send any remaining messages.
|
31
|
+
producer.send_messages
|
32
|
+
|
33
|
+
producer.shutdown
|
34
|
+
end
|
data/lib/kafka/broker_pool.rb
CHANGED
@@ -66,10 +66,10 @@ module Kafka
|
|
66
66
|
# Fetches the cluster metadata.
|
67
67
|
#
|
68
68
|
# This is used to update the partition leadership information, among other things.
|
69
|
-
# The methods will go through each node listed in
|
69
|
+
# The methods will go through each node listed in `seed_brokers`, connecting to the
|
70
70
|
# first one that is available. This node will be queried for the cluster metadata.
|
71
71
|
#
|
72
|
-
# @raise [ConnectionError] if none of the nodes in
|
72
|
+
# @raise [ConnectionError] if none of the nodes in `seed_brokers` are available.
|
73
73
|
# @return [Protocol::MetadataResponse] the cluster metadata.
|
74
74
|
def fetch_cluster_info
|
75
75
|
@seed_brokers.each do |node|
|
@@ -93,6 +93,8 @@ module Kafka
|
|
93
93
|
return cluster_info
|
94
94
|
rescue Error => e
|
95
95
|
@logger.error "Failed to fetch metadata from #{node}: #{e}"
|
96
|
+
ensure
|
97
|
+
broker.disconnect unless broker.nil?
|
96
98
|
end
|
97
99
|
end
|
98
100
|
|
data/lib/kafka/client.rb
CHANGED
data/lib/kafka/connection.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require "socket"
|
2
|
+
require "stringio"
|
2
3
|
require "kafka/protocol/request_message"
|
3
4
|
require "kafka/protocol/encoder"
|
4
5
|
require "kafka/protocol/decoder"
|
@@ -67,7 +68,7 @@ module Kafka
|
|
67
68
|
# @param request [#encode] the request that should be encoded and written.
|
68
69
|
# @param response_class [#decode] an object that can decode the response.
|
69
70
|
#
|
70
|
-
# @return [Object] the response that was decoded by
|
71
|
+
# @return [Object] the response that was decoded by `response_class`.
|
71
72
|
def request(api_key, request, response_class)
|
72
73
|
write_request(api_key, request)
|
73
74
|
|
data/lib/kafka/producer.rb
CHANGED
@@ -6,18 +6,18 @@ module Kafka
|
|
6
6
|
|
7
7
|
# Allows sending messages to a Kafka cluster.
|
8
8
|
#
|
9
|
-
#
|
9
|
+
# ## Buffering
|
10
10
|
#
|
11
11
|
# The producer buffers pending messages until {#send_messages} is called. Note that there is
|
12
12
|
# a maximum buffer size (default is 1,000 messages) and writing messages after the
|
13
13
|
# buffer has reached this size will result in a BufferOverflow exception. Make sure
|
14
|
-
# to periodically call {#send_messages} or set
|
14
|
+
# to periodically call {#send_messages} or set `max_buffer_size` to an appropriate value.
|
15
15
|
#
|
16
16
|
# Buffering messages and sending them in batches greatly improves performance, so
|
17
17
|
# try to avoid sending messages after every write. The tradeoff between throughput and
|
18
18
|
# message delays depends on your use case.
|
19
19
|
#
|
20
|
-
#
|
20
|
+
# ## Error Handling and Retries
|
21
21
|
#
|
22
22
|
# The design of the error handling is based on having a {MessageBuffer} hold messages
|
23
23
|
# for all topics/partitions. Whenever we want to send messages to the cluster, we
|
@@ -29,7 +29,43 @@ module Kafka
|
|
29
29
|
#
|
30
30
|
# After this, we check if the buffer is empty. If it is, we're all done. If it's
|
31
31
|
# not, we do another round of requests, this time with just the remaining messages.
|
32
|
-
# We do this for as long as
|
32
|
+
# We do this for as long as `max_retries` permits.
|
33
|
+
#
|
34
|
+
# ## Example
|
35
|
+
#
|
36
|
+
# This is an example of an application which reads lines from stdin and writes them
|
37
|
+
# to Kafka:
|
38
|
+
#
|
39
|
+
# require "kafka"
|
40
|
+
#
|
41
|
+
# logger = Logger.new($stderr)
|
42
|
+
# brokers = ENV.fetch("KAFKA_BROKERS").split(",")
|
43
|
+
#
|
44
|
+
# # Make sure to create this topic in your Kafka cluster or configure the
|
45
|
+
# # cluster to auto-create topics.
|
46
|
+
# topic = "random-messages"
|
47
|
+
#
|
48
|
+
# kafka = Kafka.new(
|
49
|
+
# seed_brokers: brokers,
|
50
|
+
# client_id: "simple-producer",
|
51
|
+
# logger: logger,
|
52
|
+
# )
|
53
|
+
#
|
54
|
+
# producer = kafka.get_producer
|
55
|
+
#
|
56
|
+
# begin
|
57
|
+
# $stdin.each_with_index do |line, index|
|
58
|
+
# producer.produce(line, topic: topic)
|
59
|
+
#
|
60
|
+
# # Send messages for every 10 lines.
|
61
|
+
# producer.send_messages if index % 10 == 0
|
62
|
+
# end
|
63
|
+
# ensure
|
64
|
+
# # Make sure to send any remaining messages.
|
65
|
+
# producer.send_messages
|
66
|
+
#
|
67
|
+
# producer.shutdown
|
68
|
+
# end
|
33
69
|
#
|
34
70
|
class Producer
|
35
71
|
|
@@ -68,7 +104,7 @@ module Kafka
|
|
68
104
|
# Produces a message to the specified topic. Note that messages are buffered in
|
69
105
|
# the producer until {#send_messages} is called.
|
70
106
|
#
|
71
|
-
#
|
107
|
+
# ## Partitioning
|
72
108
|
#
|
73
109
|
# There are several options for specifying the partition that the message should
|
74
110
|
# be written to.
|
@@ -77,9 +113,9 @@ module Kafka
|
|
77
113
|
# partition number, in which case the message will be assigned a partition at
|
78
114
|
# random.
|
79
115
|
#
|
80
|
-
# You can also specify the
|
116
|
+
# You can also specify the `partition` parameter yourself. This requires you to
|
81
117
|
# know which partitions are available, however. Oftentimes the best option is
|
82
|
-
# to specify the
|
118
|
+
# to specify the `partition_key` parameter: messages with the same partition
|
83
119
|
# key will always be assigned to the same partition, as long as the number of
|
84
120
|
# partitions doesn't change. You can also omit the partition key and specify
|
85
121
|
# a message key instead. The message key is part of the message payload, and
|
@@ -115,9 +151,9 @@ module Kafka
|
|
115
151
|
|
116
152
|
# Sends all buffered messages to the Kafka brokers.
|
117
153
|
#
|
118
|
-
# Depending on the value of
|
154
|
+
# Depending on the value of `required_acks` used when initializing the producer,
|
119
155
|
# this call may block until the specified number of replicas have acknowledged
|
120
|
-
# the writes. The
|
156
|
+
# the writes. The `ack_timeout` setting places an upper bound on the amount of
|
121
157
|
# time the call will block before failing.
|
122
158
|
#
|
123
159
|
# @raise [FailedToSendMessages] if not all messages could be successfully sent.
|
@@ -8,11 +8,11 @@ module Kafka
|
|
8
8
|
#
|
9
9
|
# * For each broker a node id, host, and port is provided.
|
10
10
|
# * For each topic partition the node id of the broker acting as partition leader,
|
11
|
-
# as well as a list of node ids for the set of replicas, are given. The
|
11
|
+
# as well as a list of node ids for the set of replicas, are given. The `isr` list is
|
12
12
|
# the subset of replicas that are "in sync", i.e. have fully caught up with the
|
13
13
|
# leader.
|
14
14
|
#
|
15
|
-
#
|
15
|
+
# ## API Specification
|
16
16
|
#
|
17
17
|
# MetadataResponse => [Broker][TopicMetadata]
|
18
18
|
# Broker => NodeId Host Port (any number of brokers may be returned)
|
@@ -118,6 +118,11 @@ module Kafka
|
|
118
118
|
|
119
119
|
def partitions_for(topic_name)
|
120
120
|
topic = @topics.find {|t| t.topic_name == topic_name }
|
121
|
+
|
122
|
+
if topic.nil?
|
123
|
+
raise UnknownTopicOrPartition, "unknown topic #{topic_name}"
|
124
|
+
end
|
125
|
+
|
121
126
|
topic.partitions
|
122
127
|
end
|
123
128
|
|
@@ -1,9 +1,11 @@
|
|
1
|
+
require "stringio"
|
2
|
+
|
1
3
|
module Kafka
|
2
4
|
module Protocol
|
3
5
|
|
4
6
|
# A produce request sends a message set to the server.
|
5
7
|
#
|
6
|
-
#
|
8
|
+
# ## API Specification
|
7
9
|
#
|
8
10
|
# ProduceRequest => RequiredAcks Timeout [TopicName [Partition MessageSetSize MessageSet]]
|
9
11
|
# RequiredAcks => int16
|
data/lib/kafka/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-kafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.0.pre.
|
4
|
+
version: 0.1.0.pre.beta4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Schierbeck
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-01
|
11
|
+
date: 2016-02-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -106,6 +106,7 @@ extra_rdoc_files: []
|
|
106
106
|
files:
|
107
107
|
- ".gitignore"
|
108
108
|
- ".rspec"
|
109
|
+
- ".yardopts"
|
109
110
|
- Gemfile
|
110
111
|
- LICENSE.txt
|
111
112
|
- README.md
|
@@ -113,6 +114,7 @@ files:
|
|
113
114
|
- bin/console
|
114
115
|
- bin/setup
|
115
116
|
- circle.yml
|
117
|
+
- examples/simple-producer.rb
|
116
118
|
- kafka.gemspec
|
117
119
|
- lib/kafka.rb
|
118
120
|
- lib/kafka/broker.rb
|