rafka 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5019dd9c483b224317fbe12739d09911aeb754d8
4
- data.tar.gz: 154374a9978be5f4cae89173a3cf24d351ed6f97
3
+ metadata.gz: 4c22c53b79ce4831fa5c2e643323e384dfc8ebf6
4
+ data.tar.gz: b40a73a324c70879236a954371aad5c26a8d73fc
5
5
  SHA512:
6
- metadata.gz: d7a35203337af99729c17b77f458ef97f50a4716acbf67bde3e78f055f651242e4931e038a38822dc09c310764282eb3a7f26b371b5be0d331b9a0b2355dae25
7
- data.tar.gz: b75978a87e338b87655bd01cca2bc54c964cecfa5c960e9f893541bc8c57d3ff5d8d3a611fac229d4a5756447d3cbd7a4e195982bb0e70af9a21901e120f818c
6
+ metadata.gz: e9ba330571a10ea65cc73ec2b6b813f6c5ac3bf2e2ef7c33649886929d407523ec6ac8d888f8a2e3d46d3d03f21b558eb7842535e407ee0de6c982df7caf4251
7
+ data.tar.gz: d99201dec000938566dfc7b0a6d2038fc88574a9cad5b7d921c2536f8ebacc027d0104af69de1050a5e03d01aa8fd398b925f97e85c64a3a2a89f4e1de5e16a0
data/CHANGELOG.md CHANGED
@@ -4,6 +4,14 @@ Breaking changes are prefixed with a "[BREAKING]" label.
4
4
 
5
5
  ## master (unreleased)
6
6
 
7
+
8
+
9
+ ## 0.2.0 (2018-05-04)
10
+
11
+ ### Added
12
+
13
+ - Support for batch consuming [[#12](https://github.com/skroutz/rafka-rb/pull/12)]
14
+
7
15
  ## 0.1.0 (2018-04-24)
8
16
 
9
17
  ### Added
data/README.md CHANGED
@@ -1,12 +1,12 @@
1
- rafka-rb: Ruby driver for Rafka
1
+ rafka-rb
2
2
  ===============================================================================
3
3
  [![Build Status](https://api.travis-ci.org/skroutz/rafka-rb.svg?branch=master)](https://travis-ci.org/skroutz/rafka-rb)
4
4
  [![Gem Version](https://badge.fury.io/rb/rafka.svg)](https://badge.fury.io/rb/rafka-rb)
5
5
  [![Documentation](http://img.shields.io/badge/yard-docs-blue.svg)](http://www.rubydoc.info/github/skroutz/rafka-rb)
6
6
 
7
- rafka-rb is a Ruby client for [Rafka](https://github.com/skroutz/rafka),
8
- providing consumer and producer implementations with simple semantics.
9
- It is backed by [redis-rb](https://github.com/redis/redis-rb).
7
+ rafka-rb is a Ruby client library for [Rafka](https://github.com/skroutz/rafka).
8
+
9
+ It provides Consumer and Producer implementations with simple semantics.
10
10
 
11
11
  Refer to the [API documentation](http://www.rubydoc.info/github/skroutz/rafka-rb)
12
12
  for more information.
@@ -21,6 +21,7 @@ Features
21
21
 
22
22
  - Consumer implementation
23
23
  - consumer groups
24
+ - support for consuming in batches
24
25
  - offsets may be managed automatically or manually
25
26
  - Producer implementation
26
27
  - support for partition hashing key
@@ -1,9 +1,9 @@
1
1
  require "securerandom"
2
2
 
3
3
  module Rafka
4
- # A Kafka consumer that consumes messages from a given Kafka topic
5
- # and belongs to a specific consumer group. Offsets may be commited
6
- # automatically or manually (see {#consume}).
4
+ # A Rafka-backed Kafka consumer that consumes messages from a specific topic
5
+ # and belongs to a specific consumer group. Offsets may be committed
6
+ # automatically or manually.
7
7
  #
8
8
  # @see https://kafka.apache.org/documentation/#consumerapi
9
9
  class Consumer
@@ -43,11 +43,11 @@ module Rafka
43
43
 
44
44
  # Consumes the next message.
45
45
  #
46
- # If :auto_commit is true, offsets are commited automatically.
47
- # In the block form, offsets are commited only if the block executes
46
+ # If :auto_commit is true, offsets are committed automatically.
47
+ # In the block form, offsets are committed only if the block executes
48
48
  # without raising any exceptions.
49
49
  #
50
- # If :auto_commit is false, offsets have to be commited manually using
50
+ # If :auto_commit is false, offsets have to be committed manually using
51
51
  # {#commit}.
52
52
  #
53
53
  # @param timeout [Fixnum] the time in seconds to wait for a message. If
@@ -56,7 +56,7 @@ module Rafka
56
56
  # @yieldparam [Message] msg the consumed message
57
57
  #
58
58
  # @raise [MalformedMessageError] if the message cannot be parsed
59
- # @raise [ConsumeError] if there was any error consuming a message
59
+ # @raise [ConsumeError] if there was a server error
60
60
  #
61
61
  # @return [nil, Message] the consumed message, or nil of there wasn't any
62
62
  #
@@ -67,58 +67,89 @@ module Rafka
67
67
  # @example Consume a message and commit offset if the block does not raise an exception
68
68
  # consumer.consume { |msg| puts "I received #{msg.value}" }
69
69
  def consume(timeout=5)
70
- # redis-rb didn't automatically call `CLIENT SETNAME` until v3.2.2
71
- # (https://github.com/redis/redis-rb/issues/510)
72
- #
73
- # TODO(agis): get rid of this when we drop support for 3.2.1 and before
74
- if !@redis.client.connected? && Gem::Version.new(Redis::VERSION) < Gem::Version.new("3.2.2")
75
- Rafka.wrap_errors do
76
- @redis.client.call([:client, :setname, @redis.id])
77
- end
78
- end
79
-
70
+ set_name!
80
71
  raised = false
81
- msg = nil
82
- setname_attempts = 0
72
+ msg = consume_one(timeout)
83
73
 
84
- begin
85
- Rafka.wrap_errors do
86
- Rafka.with_retry(times: @redis_opts[:reconnect_attempts]) do
87
- msg = @redis.blpop(@topic, timeout: timeout)
88
- end
89
- end
90
- rescue ConsumeError => e
91
- # redis-rb didn't automatically call `CLIENT SETNAME` until v3.2.2
92
- # (https://github.com/redis/redis-rb/issues/510)
93
- #
94
- # this is in case the server restarts while we were performing a BLPOP
95
- #
96
- # TODO(agis): get rid of this when we drop support for 3.2.1 and before
97
- if e.message =~ /Identify yourself/ && setname_attempts < 5
98
- sleep 0.5
99
- @redis.client.call([:client, :setname, @redis.id])
100
- setname_attempts += 1
101
- retry
102
- end
74
+ return nil if !msg
103
75
 
76
+ begin
77
+ yield(msg) if block_given?
78
+ rescue => e
79
+ raised = true
104
80
  raise e
105
81
  end
106
82
 
107
- return if !msg
83
+ msg
84
+ ensure
85
+ commit(msg) if @rafka_opts[:auto_commit] && msg && !raised
86
+ end
87
+
88
+ # Consume a batch of messages.
89
+ #
90
+ # Messages are accumulated in a batch until (a) batch_size number of
91
+ # messages are accumulated or (b) batching_max_sec seconds have passed.
92
+ # When either of the conditions is met the batch is returned.
93
+ #
94
+ # If :auto_commit is true, offsets are committed automatically.
95
+ # In the block form, offsets are committed only if the block executes
96
+ # without raising any exceptions.
97
+ #
98
+ # If :auto_commit is false, offsets have to be committed manually using
99
+ # {#commit}.
100
+ #
101
+ # @note Either one of, or both batch_size and batching_max_sec may be
102
+ # provided, but not neither.
103
+ #
104
+ # @param timeout [Fixnum] the time in seconds to wait for each message
105
+ # @param batch_size [Fixnum] maximum number of messages to accumulate
106
+ # in the batch
107
+ # @param batching_max_sec [Fixnum] maximum time in seconds to wait for
108
+ # messages to accumulate in the batch
109
+ #
110
+ # @yieldparam [Array<Message>] msgs the batch
111
+ #
112
+ # @raise [MalformedMessageError] if a message cannot be parsed
113
+ # @raise [ConsumeError] if there was a server error
114
+ # @raise [ArgumentError] if neither batch_size nor batching_max_sec were
115
+ # provided
116
+ #
117
+ # @return [Array<Message>] the batch
118
+ #
119
+ # @example Consume a batch of 10 messages
120
+ # msgs = consumer.consume_batch(batch_size: 10)
121
+ # msgs.size # => 10
122
+ #
123
+ # @example Accumulate messages for 5 seconds and consume the batch
124
+ # msgs = consumer.consume_batch(batching_max_sec: 5)
125
+ # msgs.size # => 3813
126
+ def consume_batch(timeout: 1.0, batch_size: 0, batching_max_sec: 0)
127
+ if batch_size == 0 && batching_max_sec == 0
128
+ raise ArgumentError, "one of batch_size or batching_max_sec must be greater than 0"
129
+ end
130
+
131
+ set_name!
132
+ raised = false
133
+ start_time = Time.now
134
+ msgs = []
135
+
136
+ loop do
137
+ break if batch_size > 0 && msgs.size >= batch_size
138
+ break if batching_max_sec > 0 && (Time.now - start_time >= batching_max_sec)
139
+ msg = consume_one(timeout)
140
+ msgs << msg if msg
141
+ end
108
142
 
109
143
  begin
110
- msg = Message.new(msg)
111
- yield(msg) if block_given?
144
+ yield(msgs) if block_given?
112
145
  rescue => e
113
146
  raised = true
114
147
  raise e
115
148
  end
116
149
 
117
- msg
150
+ msgs
118
151
  ensure
119
- if msg && !raised && @rafka_opts[:auto_commit]
120
- commit(msg)
121
- end
152
+ commit(*msgs) if @rafka_opts[:auto_commit] && !raised
122
153
  end
123
154
 
124
155
  # Commit offsets for the given messages.
@@ -127,17 +158,18 @@ module Rafka
127
158
  # only the largest offset amongst them is committed.
128
159
  #
129
160
  # @note This is non-blocking operation; a successful server reply means
130
- # offsets are received by the server and will _eventually_ be committed
131
- # to Kafka.
161
+ # offsets are received by the server and will _eventually_ be submitted
162
+ # to Kafka. It is not guaranteed that offsets will be actually committed
163
+ # in case of failures.
132
164
  #
133
- # @param msgs [Array<Message>] the messages for which to commit offsets
165
+ # @param msgs [Array<Message>] any number of messages for which to commit
166
+ # offsets
134
167
  #
135
- # @raise [ConsumeError] if there was any error commiting offsets
168
+ # @raise [ConsumeError] if there was a server error
136
169
  #
137
- # @return [Hash] the actual offsets sent for commit
138
- # @return [Hash{String=>Hash{Integer=>Integer}}] the actual offsets sent
139
- # for commit.Keys denote the topics while values contain the
140
- # partition=>offset pairs.
170
+ # @return [Hash{String=>Hash{Fixnum=>Fixnum}}] the actual offsets sent
171
+ # to the server for commit. Keys contain topics while values contain
172
+ # the respective partition/offset pairs.
141
173
  def commit(*msgs)
142
174
  tp = prepare_for_commit(*msgs)
143
175
 
@@ -176,7 +208,7 @@ module Rafka
176
208
  #
177
209
  # @param msgs [Array<Message>]
178
210
  #
179
- # @return [Hash{String=>Hash{Integer=>Integer}}] the offsets to be commited.
211
+ # @return [Hash{String=>Hash{Fixnum=>Fixnum}}] the offsets to be committed.
180
212
  # Keys denote the topics while values contain the partition=>offset pairs.
181
213
  def prepare_for_commit(*msgs)
182
214
  tp = Hash.new { |h, k| h[k] = Hash.new(0) }
@@ -189,5 +221,53 @@ module Rafka
189
221
 
190
222
  tp
191
223
  end
224
+
225
+ # redis-rb didn't automatically call `CLIENT SETNAME` until v3.2.2
226
+ # (https://github.com/redis/redis-rb/issues/510)
227
+ #
228
+ # TODO(agis): get rid of this when we drop support for 3.2.1 and before
229
+ def set_name!
230
+ return if @redis.client.connected? && Gem::Version.new(Redis::VERSION) >= Gem::Version.new("3.2.2")
231
+
232
+ Rafka.wrap_errors do
233
+ @redis.client.call([:client, :setname, @redis.id])
234
+ end
235
+ end
236
+
237
+ # @param timeout [Fixnum]
238
+ #
239
+ # @raise [MalformedMessageError]
240
+ #
241
+ # @return [nil, Message]
242
+ def consume_one(timeout)
243
+ msg = nil
244
+ setname_attempts = 0
245
+
246
+ begin
247
+ Rafka.wrap_errors do
248
+ Rafka.with_retry(times: @redis_opts[:reconnect_attempts]) do
249
+ msg = @redis.blpop(@topic, timeout: timeout)
250
+ end
251
+ end
252
+ rescue ConsumeError => e
253
+ # redis-rb didn't automatically call `CLIENT SETNAME` until v3.2.2
254
+ # (https://github.com/redis/redis-rb/issues/510)
255
+ #
256
+ # this is in case the server restarts while we were performing a BLPOP
257
+ #
258
+ # TODO(agis): get rid of this when we drop support for 3.2.1 and before
259
+ if e.message =~ /Identify yourself/ && setname_attempts < 5
260
+ sleep 0.5
261
+ @redis.client.call([:client, :setname, @redis.id])
262
+ setname_attempts += 1
263
+ retry
264
+ end
265
+
266
+ raise e
267
+ end
268
+
269
+ msg = Message.new(msg) if msg
270
+ msg
271
+ end
192
272
  end
193
273
  end
data/lib/rafka/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Rafka
2
- VERSION = "0.1.0".freeze
2
+ VERSION = "0.2.0".freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Agis Anastasopoulos
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-04-24 00:00:00.000000000 Z
11
+ date: 2018-05-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: redis