ruby-kafka 0.1.0.pre.beta2 → 0.1.0.pre.beta3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +0 -4
- data/README.md +12 -8
- data/kafka.gemspec +9 -1
- data/lib/kafka/broker_pool.rb +5 -0
- data/lib/kafka/client.rb +24 -10
- data/lib/kafka/producer.rb +20 -17
- data/lib/kafka/version.rb +1 -1
- metadata +34 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e1c4fdc58659935c4d1e3af22cf746fc88f9dacc
|
4
|
+
data.tar.gz: 23f8e2ba29251ff66499cad11d22d029cbb24d58
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 522eb1d15cedc44ce760a381a710c3bd4beda7f21adc6f30bf438837de2ae75be9dfc82a57417ee7d3d1ef99bb599d80ed92949c6d9b1633e46395e5898c3aa4
|
7
|
+
data.tar.gz: 49f0129fecb9724874895751fafcbe0da1f6f3434ffe31daacc5f69876e2dda68cd91f040f385a5620e90386effc0728cb943717be320f5e64da46194eaa708b
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -25,23 +25,27 @@ Or install it yourself as:
|
|
25
25
|
Currently, only the Producer API is supported. A Kafka 0.9 compatible Consumer API is on the roadmap.
|
26
26
|
|
27
27
|
```ruby
|
28
|
-
#
|
28
|
+
# A client must be initialized with at least one Kafka broker. Each client keeps
|
29
|
+
# a separate pool of broker connections. Don't use the same client from more than
|
30
|
+
# one thread.
|
29
31
|
kafka = Kafka.new(
|
30
32
|
seed_brokers: ["kafka1:9092", "kafka2:9092"],
|
31
33
|
client_id: "my-app",
|
32
34
|
logger: Logger.new($stderr),
|
33
35
|
)
|
34
36
|
|
35
|
-
#
|
36
|
-
#
|
37
|
+
# A producer buffers messages and sends them to the broker that is the leader of
|
38
|
+
# the partition a given message is being produced to.
|
37
39
|
producer = kafka.get_producer
|
38
40
|
|
39
|
-
# `
|
40
|
-
producer.
|
41
|
-
producer.
|
41
|
+
# `produce` will buffer the message in the producer.
|
42
|
+
producer.produce("hello1", key: "x", topic: "test-messages", partition: 0)
|
43
|
+
producer.produce("hello2", key: "y", topic: "test-messages", partition: 1)
|
42
44
|
|
43
|
-
# `
|
44
|
-
|
45
|
+
# `send_messages` will send the buffered messages to the cluster. Since messages
|
46
|
+
# may be destined for different partitions, this could involve writing to more
|
47
|
+
# than one Kafka broker.
|
48
|
+
producer.send_messages
|
45
49
|
```
|
46
50
|
|
47
51
|
## Development
|
data/kafka.gemspec
CHANGED
@@ -10,7 +10,13 @@ Gem::Specification.new do |spec|
|
|
10
10
|
spec.email = ["daniel.schierbeck@gmail.com"]
|
11
11
|
|
12
12
|
spec.summary = %q{A client library for the Kafka distributed commit log.}
|
13
|
-
|
13
|
+
|
14
|
+
spec.description = <<-DESC.gsub(/^ /, "").strip
|
15
|
+
A client library for the Kafka distributed commit log.
|
16
|
+
|
17
|
+
Currently, only the Producer API is implemented.
|
18
|
+
DESC
|
19
|
+
|
14
20
|
spec.homepage = "https://github.com/zendesk/ruby-kafka"
|
15
21
|
spec.license = "Apache License Version 2.0"
|
16
22
|
|
@@ -23,4 +29,6 @@ Gem::Specification.new do |spec|
|
|
23
29
|
spec.add_development_dependency "rake", "~> 10.0"
|
24
30
|
spec.add_development_dependency "rspec"
|
25
31
|
spec.add_development_dependency "pry"
|
32
|
+
spec.add_development_dependency "dotenv"
|
33
|
+
spec.add_development_dependency "docker-api"
|
26
34
|
end
|
data/lib/kafka/broker_pool.rb
CHANGED
@@ -15,6 +15,7 @@ module Kafka
|
|
15
15
|
# @param seed_brokers [Array<String>]
|
16
16
|
# @param client_id [String]
|
17
17
|
# @param logger [Logger]
|
18
|
+
# @param socket_timeout [Integer, nil] see {Connection#initialize}.
|
18
19
|
def initialize(seed_brokers:, client_id:, logger:, socket_timeout: nil)
|
19
20
|
@client_id = client_id
|
20
21
|
@logger = logger
|
@@ -45,6 +46,10 @@ module Kafka
|
|
45
46
|
cluster_info.partitions_for(topic)
|
46
47
|
end
|
47
48
|
|
49
|
+
def topics
|
50
|
+
cluster_info.topics.map(&:topic_name)
|
51
|
+
end
|
52
|
+
|
48
53
|
def shutdown
|
49
54
|
@brokers.each do |id, broker|
|
50
55
|
@logger.info "Disconnecting broker #{id}"
|
data/lib/kafka/client.rb
CHANGED
@@ -18,21 +18,35 @@ module Kafka
|
|
18
18
|
#
|
19
19
|
# @return [Client]
|
20
20
|
def initialize(seed_brokers:, client_id:, logger:, socket_timeout: nil)
|
21
|
-
@seed_brokers = seed_brokers
|
22
|
-
@client_id = client_id
|
23
21
|
@logger = logger
|
24
|
-
|
22
|
+
|
23
|
+
@broker_pool = BrokerPool.new(
|
24
|
+
seed_brokers: seed_brokers,
|
25
|
+
client_id: client_id,
|
26
|
+
logger: logger,
|
27
|
+
socket_timeout: socket_timeout,
|
28
|
+
)
|
25
29
|
end
|
26
30
|
|
31
|
+
# Builds a new producer.
|
32
|
+
#
|
33
|
+
# +options+ are passed to {Producer#initialize}.
|
34
|
+
#
|
35
|
+
# @see Producer#initialize
|
36
|
+
# @return [Producer] the Kafka producer.
|
27
37
|
def get_producer(**options)
|
28
|
-
broker_pool
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
38
|
+
Producer.new(broker_pool: @broker_pool, logger: @logger, **options)
|
39
|
+
end
|
40
|
+
|
41
|
+
# Lists all topics in the cluster.
|
42
|
+
#
|
43
|
+
# @return [Array<String>] the list of topic names.
|
44
|
+
def topics
|
45
|
+
@broker_pool.topics
|
46
|
+
end
|
34
47
|
|
35
|
-
|
48
|
+
def close
|
49
|
+
@broker_pool.shutdown
|
36
50
|
end
|
37
51
|
end
|
38
52
|
end
|
data/lib/kafka/producer.rb
CHANGED
@@ -8,19 +8,19 @@ module Kafka
|
|
8
8
|
#
|
9
9
|
# == Buffering
|
10
10
|
#
|
11
|
-
# The producer buffers pending messages until {#
|
11
|
+
# The producer buffers pending messages until {#send_messages} is called. Note that there is
|
12
12
|
# a maximum buffer size (default is 1,000 messages) and writing messages after the
|
13
13
|
# buffer has reached this size will result in a BufferOverflow exception. Make sure
|
14
|
-
# to periodically call {#
|
14
|
+
# to periodically call {#send_messages} or set +max_buffer_size+ to an appropriate value.
|
15
15
|
#
|
16
16
|
# Buffering messages and sending them in batches greatly improves performance, so
|
17
|
-
# try to avoid
|
17
|
+
# try to avoid sending messages after every write. The tradeoff between throughput and
|
18
18
|
# message delays depends on your use case.
|
19
19
|
#
|
20
20
|
# == Error Handling and Retries
|
21
21
|
#
|
22
22
|
# The design of the error handling is based on having a {MessageBuffer} hold messages
|
23
|
-
# for all topics/partitions. Whenever we want to
|
23
|
+
# for all topics/partitions. Whenever we want to send messages to the cluster, we
|
24
24
|
# group the buffered messages by the broker they need to be sent to and fire off a
|
25
25
|
# request to each broker. A request can be a partial success, so we go through the
|
26
26
|
# response and inspect the error code for each partition that we wrote to. If the
|
@@ -39,7 +39,7 @@ module Kafka
|
|
39
39
|
#
|
40
40
|
# @param logger [Logger]
|
41
41
|
#
|
42
|
-
# @param
|
42
|
+
# @param ack_timeout [Integer] The number of seconds a broker can wait for
|
43
43
|
# replicas to acknowledge a write before responding with a timeout.
|
44
44
|
#
|
45
45
|
# @param required_acks [Integer] The number of replicas that must acknowledge
|
@@ -54,19 +54,19 @@ module Kafka
|
|
54
54
|
# @param max_buffer_size [Integer] the number of messages allowed in the buffer
|
55
55
|
# before new writes will raise BufferOverflow exceptions.
|
56
56
|
#
|
57
|
-
def initialize(broker_pool:, logger:,
|
57
|
+
def initialize(broker_pool:, logger:, ack_timeout: 10, required_acks: 1, max_retries: 2, retry_backoff: 1, max_buffer_size: 1000)
|
58
58
|
@broker_pool = broker_pool
|
59
59
|
@logger = logger
|
60
60
|
@required_acks = required_acks
|
61
|
-
@
|
61
|
+
@ack_timeout = ack_timeout
|
62
62
|
@max_retries = max_retries
|
63
63
|
@retry_backoff = retry_backoff
|
64
64
|
@max_buffer_size = max_buffer_size
|
65
65
|
@buffer = MessageBuffer.new
|
66
66
|
end
|
67
67
|
|
68
|
-
#
|
69
|
-
# the producer until {#
|
68
|
+
# Produces a message to the specified topic. Note that messages are buffered in
|
69
|
+
# the producer until {#send_messages} is called.
|
70
70
|
#
|
71
71
|
# == Partitioning
|
72
72
|
#
|
@@ -94,7 +94,7 @@ module Kafka
|
|
94
94
|
#
|
95
95
|
# @raise [BufferOverflow] if the maximum buffer size has been reached.
|
96
96
|
# @return [nil]
|
97
|
-
def
|
97
|
+
def produce(value, key: nil, topic:, partition: nil, partition_key: nil)
|
98
98
|
unless buffer_size < @max_buffer_size
|
99
99
|
raise BufferOverflow, "Max buffer size #{@max_buffer_size} exceeded"
|
100
100
|
end
|
@@ -113,20 +113,20 @@ module Kafka
|
|
113
113
|
partition
|
114
114
|
end
|
115
115
|
|
116
|
-
#
|
116
|
+
# Sends all buffered messages to the Kafka brokers.
|
117
117
|
#
|
118
118
|
# Depending on the value of +required_acks+ used when initializing the producer,
|
119
119
|
# this call may block until the specified number of replicas have acknowledged
|
120
|
-
# the writes. The +
|
121
|
-
# the call will block before failing.
|
120
|
+
# the writes. The +ack_timeout+ setting places an upper bound on the amount of
|
121
|
+
# time the call will block before failing.
|
122
122
|
#
|
123
123
|
# @raise [FailedToSendMessages] if not all messages could be successfully sent.
|
124
124
|
# @return [nil]
|
125
|
-
def
|
125
|
+
def send_messages
|
126
126
|
attempt = 0
|
127
127
|
|
128
128
|
loop do
|
129
|
-
@logger.info "
|
129
|
+
@logger.info "Sending #{@buffer.size} messages"
|
130
130
|
|
131
131
|
attempt += 1
|
132
132
|
transmit_messages
|
@@ -166,6 +166,9 @@ module Kafka
|
|
166
166
|
@buffer.size
|
167
167
|
end
|
168
168
|
|
169
|
+
# Closes all connections to the brokers.
|
170
|
+
#
|
171
|
+
# @return [nil]
|
169
172
|
def shutdown
|
170
173
|
@broker_pool.shutdown
|
171
174
|
end
|
@@ -191,7 +194,7 @@ module Kafka
|
|
191
194
|
response = broker.produce(
|
192
195
|
messages_for_topics: message_set.to_h,
|
193
196
|
required_acks: @required_acks,
|
194
|
-
timeout: @
|
197
|
+
timeout: @ack_timeout * 1000, # Kafka expects the timeout in milliseconds.
|
195
198
|
)
|
196
199
|
|
197
200
|
handle_response(response) if response
|
@@ -229,7 +232,7 @@ module Kafka
|
|
229
232
|
@logger.error "Messages written, but to fewer in-sync replicas than required for #{topic}/#{partition}"
|
230
233
|
else
|
231
234
|
offset = partition_info.offset
|
232
|
-
@logger.info "Successfully
|
235
|
+
@logger.info "Successfully sent messages for #{topic}/#{partition}; new offset is #{offset}"
|
233
236
|
|
234
237
|
# The messages were successfully written; clear them from the buffer.
|
235
238
|
@buffer.clear_messages(topic: topic, partition: partition)
|
data/lib/kafka/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-kafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.0.pre.
|
4
|
+
version: 0.1.0.pre.beta3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Schierbeck
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-01-
|
11
|
+
date: 2016-01-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -66,8 +66,38 @@ dependencies:
|
|
66
66
|
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
|
-
|
70
|
-
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: dotenv
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: docker-api
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
description: |-
|
98
|
+
A client library for the Kafka distributed commit log.
|
99
|
+
|
100
|
+
Currently, only the Producer API is implemented.
|
71
101
|
email:
|
72
102
|
- daniel.schierbeck@gmail.com
|
73
103
|
executables: []
|