ruby-kafka 0.1.0.pre.beta2 → 0.1.0.pre.beta3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +0 -4
- data/README.md +12 -8
- data/kafka.gemspec +9 -1
- data/lib/kafka/broker_pool.rb +5 -0
- data/lib/kafka/client.rb +24 -10
- data/lib/kafka/producer.rb +20 -17
- data/lib/kafka/version.rb +1 -1
- metadata +34 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e1c4fdc58659935c4d1e3af22cf746fc88f9dacc
|
4
|
+
data.tar.gz: 23f8e2ba29251ff66499cad11d22d029cbb24d58
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 522eb1d15cedc44ce760a381a710c3bd4beda7f21adc6f30bf438837de2ae75be9dfc82a57417ee7d3d1ef99bb599d80ed92949c6d9b1633e46395e5898c3aa4
|
7
|
+
data.tar.gz: 49f0129fecb9724874895751fafcbe0da1f6f3434ffe31daacc5f69876e2dda68cd91f040f385a5620e90386effc0728cb943717be320f5e64da46194eaa708b
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -25,23 +25,27 @@ Or install it yourself as:
|
|
25
25
|
Currently, only the Producer API is supported. A Kafka 0.9 compatible Consumer API is on the roadmap.
|
26
26
|
|
27
27
|
```ruby
|
28
|
-
#
|
28
|
+
# A client must be initialized with at least one Kafka broker. Each client keeps
|
29
|
+
# a separate pool of broker connections. Don't use the same client from more than
|
30
|
+
# one thread.
|
29
31
|
kafka = Kafka.new(
|
30
32
|
seed_brokers: ["kafka1:9092", "kafka2:9092"],
|
31
33
|
client_id: "my-app",
|
32
34
|
logger: Logger.new($stderr),
|
33
35
|
)
|
34
36
|
|
35
|
-
#
|
36
|
-
#
|
37
|
+
# A producer buffers messages and sends them to the broker that is the leader of
|
38
|
+
# the partition a given message is being produced to.
|
37
39
|
producer = kafka.get_producer
|
38
40
|
|
39
|
-
# `
|
40
|
-
producer.
|
41
|
-
producer.
|
41
|
+
# `produce` will buffer the message in the producer.
|
42
|
+
producer.produce("hello1", key: "x", topic: "test-messages", partition: 0)
|
43
|
+
producer.produce("hello2", key: "y", topic: "test-messages", partition: 1)
|
42
44
|
|
43
|
-
# `
|
44
|
-
|
45
|
+
# `send_messages` will send the buffered messages to the cluster. Since messages
|
46
|
+
# may be destined for different partitions, this could involve writing to more
|
47
|
+
# than one Kafka broker.
|
48
|
+
producer.send_messages
|
45
49
|
```
|
46
50
|
|
47
51
|
## Development
|
data/kafka.gemspec
CHANGED
@@ -10,7 +10,13 @@ Gem::Specification.new do |spec|
|
|
10
10
|
spec.email = ["daniel.schierbeck@gmail.com"]
|
11
11
|
|
12
12
|
spec.summary = %q{A client library for the Kafka distributed commit log.}
|
13
|
-
|
13
|
+
|
14
|
+
spec.description = <<-DESC.gsub(/^ /, "").strip
|
15
|
+
A client library for the Kafka distributed commit log.
|
16
|
+
|
17
|
+
Currently, only the Producer API is implemented.
|
18
|
+
DESC
|
19
|
+
|
14
20
|
spec.homepage = "https://github.com/zendesk/ruby-kafka"
|
15
21
|
spec.license = "Apache License Version 2.0"
|
16
22
|
|
@@ -23,4 +29,6 @@ Gem::Specification.new do |spec|
|
|
23
29
|
spec.add_development_dependency "rake", "~> 10.0"
|
24
30
|
spec.add_development_dependency "rspec"
|
25
31
|
spec.add_development_dependency "pry"
|
32
|
+
spec.add_development_dependency "dotenv"
|
33
|
+
spec.add_development_dependency "docker-api"
|
26
34
|
end
|
data/lib/kafka/broker_pool.rb
CHANGED
@@ -15,6 +15,7 @@ module Kafka
|
|
15
15
|
# @param seed_brokers [Array<String>]
|
16
16
|
# @param client_id [String]
|
17
17
|
# @param logger [Logger]
|
18
|
+
# @param socket_timeout [Integer, nil] see {Connection#initialize}.
|
18
19
|
def initialize(seed_brokers:, client_id:, logger:, socket_timeout: nil)
|
19
20
|
@client_id = client_id
|
20
21
|
@logger = logger
|
@@ -45,6 +46,10 @@ module Kafka
|
|
45
46
|
cluster_info.partitions_for(topic)
|
46
47
|
end
|
47
48
|
|
49
|
+
def topics
|
50
|
+
cluster_info.topics.map(&:topic_name)
|
51
|
+
end
|
52
|
+
|
48
53
|
def shutdown
|
49
54
|
@brokers.each do |id, broker|
|
50
55
|
@logger.info "Disconnecting broker #{id}"
|
data/lib/kafka/client.rb
CHANGED
@@ -18,21 +18,35 @@ module Kafka
|
|
18
18
|
#
|
19
19
|
# @return [Client]
|
20
20
|
def initialize(seed_brokers:, client_id:, logger:, socket_timeout: nil)
|
21
|
-
@seed_brokers = seed_brokers
|
22
|
-
@client_id = client_id
|
23
21
|
@logger = logger
|
24
|
-
|
22
|
+
|
23
|
+
@broker_pool = BrokerPool.new(
|
24
|
+
seed_brokers: seed_brokers,
|
25
|
+
client_id: client_id,
|
26
|
+
logger: logger,
|
27
|
+
socket_timeout: socket_timeout,
|
28
|
+
)
|
25
29
|
end
|
26
30
|
|
31
|
+
# Builds a new producer.
|
32
|
+
#
|
33
|
+
# +options+ are passed to {Producer#initialize}.
|
34
|
+
#
|
35
|
+
# @see Producer#initialize
|
36
|
+
# @return [Producer] the Kafka producer.
|
27
37
|
def get_producer(**options)
|
28
|
-
broker_pool
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
38
|
+
Producer.new(broker_pool: @broker_pool, logger: @logger, **options)
|
39
|
+
end
|
40
|
+
|
41
|
+
# Lists all topics in the cluster.
|
42
|
+
#
|
43
|
+
# @return [Array<String>] the list of topic names.
|
44
|
+
def topics
|
45
|
+
@broker_pool.topics
|
46
|
+
end
|
34
47
|
|
35
|
-
|
48
|
+
def close
|
49
|
+
@broker_pool.shutdown
|
36
50
|
end
|
37
51
|
end
|
38
52
|
end
|
data/lib/kafka/producer.rb
CHANGED
@@ -8,19 +8,19 @@ module Kafka
|
|
8
8
|
#
|
9
9
|
# == Buffering
|
10
10
|
#
|
11
|
-
# The producer buffers pending messages until {#
|
11
|
+
# The producer buffers pending messages until {#send_messages} is called. Note that there is
|
12
12
|
# a maximum buffer size (default is 1,000 messages) and writing messages after the
|
13
13
|
# buffer has reached this size will result in a BufferOverflow exception. Make sure
|
14
|
-
# to periodically call {#
|
14
|
+
# to periodically call {#send_messages} or set +max_buffer_size+ to an appropriate value.
|
15
15
|
#
|
16
16
|
# Buffering messages and sending them in batches greatly improves performance, so
|
17
|
-
# try to avoid
|
17
|
+
# try to avoid sending messages after every write. The tradeoff between throughput and
|
18
18
|
# message delays depends on your use case.
|
19
19
|
#
|
20
20
|
# == Error Handling and Retries
|
21
21
|
#
|
22
22
|
# The design of the error handling is based on having a {MessageBuffer} hold messages
|
23
|
-
# for all topics/partitions. Whenever we want to
|
23
|
+
# for all topics/partitions. Whenever we want to send messages to the cluster, we
|
24
24
|
# group the buffered messages by the broker they need to be sent to and fire off a
|
25
25
|
# request to each broker. A request can be a partial success, so we go through the
|
26
26
|
# response and inspect the error code for each partition that we wrote to. If the
|
@@ -39,7 +39,7 @@ module Kafka
|
|
39
39
|
#
|
40
40
|
# @param logger [Logger]
|
41
41
|
#
|
42
|
-
# @param
|
42
|
+
# @param ack_timeout [Integer] The number of seconds a broker can wait for
|
43
43
|
# replicas to acknowledge a write before responding with a timeout.
|
44
44
|
#
|
45
45
|
# @param required_acks [Integer] The number of replicas that must acknowledge
|
@@ -54,19 +54,19 @@ module Kafka
|
|
54
54
|
# @param max_buffer_size [Integer] the number of messages allowed in the buffer
|
55
55
|
# before new writes will raise BufferOverflow exceptions.
|
56
56
|
#
|
57
|
-
def initialize(broker_pool:, logger:,
|
57
|
+
def initialize(broker_pool:, logger:, ack_timeout: 10, required_acks: 1, max_retries: 2, retry_backoff: 1, max_buffer_size: 1000)
|
58
58
|
@broker_pool = broker_pool
|
59
59
|
@logger = logger
|
60
60
|
@required_acks = required_acks
|
61
|
-
@
|
61
|
+
@ack_timeout = ack_timeout
|
62
62
|
@max_retries = max_retries
|
63
63
|
@retry_backoff = retry_backoff
|
64
64
|
@max_buffer_size = max_buffer_size
|
65
65
|
@buffer = MessageBuffer.new
|
66
66
|
end
|
67
67
|
|
68
|
-
#
|
69
|
-
# the producer until {#
|
68
|
+
# Produces a message to the specified topic. Note that messages are buffered in
|
69
|
+
# the producer until {#send_messages} is called.
|
70
70
|
#
|
71
71
|
# == Partitioning
|
72
72
|
#
|
@@ -94,7 +94,7 @@ module Kafka
|
|
94
94
|
#
|
95
95
|
# @raise [BufferOverflow] if the maximum buffer size has been reached.
|
96
96
|
# @return [nil]
|
97
|
-
def
|
97
|
+
def produce(value, key: nil, topic:, partition: nil, partition_key: nil)
|
98
98
|
unless buffer_size < @max_buffer_size
|
99
99
|
raise BufferOverflow, "Max buffer size #{@max_buffer_size} exceeded"
|
100
100
|
end
|
@@ -113,20 +113,20 @@ module Kafka
|
|
113
113
|
partition
|
114
114
|
end
|
115
115
|
|
116
|
-
#
|
116
|
+
# Sends all buffered messages to the Kafka brokers.
|
117
117
|
#
|
118
118
|
# Depending on the value of +required_acks+ used when initializing the producer,
|
119
119
|
# this call may block until the specified number of replicas have acknowledged
|
120
|
-
# the writes. The +
|
121
|
-
# the call will block before failing.
|
120
|
+
# the writes. The +ack_timeout+ setting places an upper bound on the amount of
|
121
|
+
# time the call will block before failing.
|
122
122
|
#
|
123
123
|
# @raise [FailedToSendMessages] if not all messages could be successfully sent.
|
124
124
|
# @return [nil]
|
125
|
-
def
|
125
|
+
def send_messages
|
126
126
|
attempt = 0
|
127
127
|
|
128
128
|
loop do
|
129
|
-
@logger.info "
|
129
|
+
@logger.info "Sending #{@buffer.size} messages"
|
130
130
|
|
131
131
|
attempt += 1
|
132
132
|
transmit_messages
|
@@ -166,6 +166,9 @@ module Kafka
|
|
166
166
|
@buffer.size
|
167
167
|
end
|
168
168
|
|
169
|
+
# Closes all connections to the brokers.
|
170
|
+
#
|
171
|
+
# @return [nil]
|
169
172
|
def shutdown
|
170
173
|
@broker_pool.shutdown
|
171
174
|
end
|
@@ -191,7 +194,7 @@ module Kafka
|
|
191
194
|
response = broker.produce(
|
192
195
|
messages_for_topics: message_set.to_h,
|
193
196
|
required_acks: @required_acks,
|
194
|
-
timeout: @
|
197
|
+
timeout: @ack_timeout * 1000, # Kafka expects the timeout in milliseconds.
|
195
198
|
)
|
196
199
|
|
197
200
|
handle_response(response) if response
|
@@ -229,7 +232,7 @@ module Kafka
|
|
229
232
|
@logger.error "Messages written, but to fewer in-sync replicas than required for #{topic}/#{partition}"
|
230
233
|
else
|
231
234
|
offset = partition_info.offset
|
232
|
-
@logger.info "Successfully
|
235
|
+
@logger.info "Successfully sent messages for #{topic}/#{partition}; new offset is #{offset}"
|
233
236
|
|
234
237
|
# The messages were successfully written; clear them from the buffer.
|
235
238
|
@buffer.clear_messages(topic: topic, partition: partition)
|
data/lib/kafka/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-kafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.0.pre.
|
4
|
+
version: 0.1.0.pre.beta3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Schierbeck
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-01-
|
11
|
+
date: 2016-01-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -66,8 +66,38 @@ dependencies:
|
|
66
66
|
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
|
-
|
70
|
-
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: dotenv
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: docker-api
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
description: |-
|
98
|
+
A client library for the Kafka distributed commit log.
|
99
|
+
|
100
|
+
Currently, only the Producer API is implemented.
|
71
101
|
email:
|
72
102
|
- daniel.schierbeck@gmail.com
|
73
103
|
executables: []
|