kafka_syrup 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,53 @@
1
+ module KafkaSyrup
2
+ module Protocol
3
+ class OffsetRequest < Request
4
+ self.api_key = 2
5
+
6
+ attr_accessor :topics
7
+
8
+ def defaults
9
+ { topics: [] }
10
+ end
11
+
12
+ def add_topic(name)
13
+ topic = Topic.new(name, [])
14
+ topics << topic
15
+ topic
16
+ end
17
+
18
+ def encode
19
+ super do
20
+ [
21
+ E.write_int32(REPLICA_ID),
22
+ E.write_array(topics)
23
+ ].join
24
+ end
25
+ end
26
+
27
+ Topic = Struct.new(:name, :partitions) do
28
+ def add_partition(id, time = -1, max_offsets = 1)
29
+ partition = Partition.new(id, time, max_offsets)
30
+ partitions << partition
31
+ partition
32
+ end
33
+
34
+ def encode
35
+ [
36
+ E.write_string(name),
37
+ E.write_array(partitions)
38
+ ].join
39
+ end
40
+ end
41
+
42
+ Partition = Struct.new(:id, :time, :max_offsets) do
43
+ def encode
44
+ [
45
+ E.write_int32(id),
46
+ E.write_int64(time),
47
+ E.write_int32(max_offsets)
48
+ ].join
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,69 @@
1
+ module KafkaSyrup
2
+ module Protocol
3
+ class OffsetResponse < Response
4
+ attr_accessor :topics
5
+
6
+ def defaults
7
+ { topics: [] }
8
+ end
9
+
10
+ def add_topic(name)
11
+ topic = Topic.new(name, [])
12
+ topics << topic
13
+ topic
14
+ end
15
+
16
+ def encode
17
+ super do
18
+ E.write_array(topics)
19
+ end
20
+ end
21
+
22
+ def decode(io)
23
+ super
24
+ self.topics = E.read_array(io, &Topic.method(:decode))
25
+ topics.flat_map(&:partitions).map(&:code).each(&KafkaResponseErrors.method(:raise_from_code))
26
+ end
27
+
28
+ Topic = Struct.new(:name, :partitions) do
29
+ def add_partition(id, code, offsets)
30
+ partition = Partition.new(id, code, offsets)
31
+ partitions << partition
32
+ partition
33
+ end
34
+
35
+ def encode
36
+ [
37
+ E.write_string(name),
38
+ E.write_array(partitions)
39
+ ].join
40
+ end
41
+
42
+ def self.decode(io)
43
+ new(
44
+ E.read_string(io), # Name
45
+ E.read_array(io, &Partition.method(:decode)), # Partitions
46
+ )
47
+ end
48
+ end
49
+
50
+ Partition = Struct.new(:id, :code, :offsets) do
51
+ def encode
52
+ [
53
+ E.write_int32(id),
54
+ E.write_int16(code),
55
+ E.write_array(offsets, &E.method(:write_int64))
56
+ ].join
57
+ end
58
+
59
+ def self.decode(io)
60
+ new(
61
+ E.read_int32(io), # ID
62
+ E.read_int16(io), # Error Code
63
+ E.read_array(io, &E.method(:read_int64)) # Offsets
64
+ )
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,69 @@
1
+ module KafkaSyrup
2
+ module Protocol
3
+ class ProduceRequest < Request
4
+ self.api_key = 0
5
+
6
+ attr_accessor :required_acks, :timeout, :topics
7
+
8
+ def defaults
9
+ {
10
+ required_acks: config.produce_required_acks,
11
+ timeout: config.produce_timeout,
12
+ topics: []
13
+ }
14
+ end
15
+
16
+ def add_topic(name)
17
+ topic = Topic.new(name, [])
18
+ topics << topic
19
+ topic
20
+ end
21
+
22
+ def encode
23
+ super do
24
+ [
25
+ E.write_int16(required_acks),
26
+ E.write_int32(timeout),
27
+ E.write_array(topics)
28
+ ].join
29
+ end
30
+ end
31
+
32
+ Topic = Struct.new(:name, :partitions) do
33
+ def add_partition(id)
34
+ partition = Partition.new(id, MessageSet.new)
35
+ partitions << partition
36
+ partition
37
+ end
38
+
39
+ def encode
40
+ [
41
+ E.write_string(name),
42
+ E.write_array(partitions)
43
+ ].join
44
+ end
45
+ end
46
+
47
+ Partition = Struct.new(:id, :message_set) do
48
+ def messages
49
+ message_set.messages
50
+ end
51
+
52
+ def add_message(value = nil, opts = {})
53
+ msg = Message.new(opts.merge(value: value))
54
+ message_set.messages << msg
55
+ msg
56
+ end
57
+
58
+ def encode
59
+ encoded = message_set.encode
60
+ [
61
+ E.write_int32(id),
62
+ E.write_int32(encoded.length),
63
+ encoded
64
+ ].join
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,69 @@
1
+ module KafkaSyrup
2
+ module Protocol
3
+ class ProduceResponse < Response
4
+ attr_accessor :topics
5
+
6
+ def defaults
7
+ { topics: [] }
8
+ end
9
+
10
+ def add_topic(name)
11
+ topic = Topic.new(name, [])
12
+ topics << topic
13
+ topic
14
+ end
15
+
16
+ def encode
17
+ super do
18
+ E.write_array(topics)
19
+ end
20
+ end
21
+
22
+ def decode(io)
23
+ super
24
+ self.topics = E.read_array(io, &Topic.method(:decode))
25
+ topics.flat_map(&:partitions).map(&:code).each(&KafkaResponseErrors.method(:raise_from_code))
26
+ end
27
+
28
+ Topic = Struct.new(:name, :partitions) do
29
+ def add_partition(id, code, offset)
30
+ partition = Partition.new(id, code, offset)
31
+ partitions << partition
32
+ partition
33
+ end
34
+
35
+ def encode
36
+ [
37
+ E.write_string(name),
38
+ E.write_array(partitions)
39
+ ].join
40
+ end
41
+
42
+ def self.decode(io)
43
+ new(
44
+ E.read_string(io), # Name
45
+ E.read_array(io, &Partition.method(:decode)) # Partitions
46
+ )
47
+ end
48
+ end
49
+
50
+ Partition = Struct.new(:id, :code, :offset) do
51
+ def encode
52
+ [
53
+ E.write_int32(id),
54
+ E.write_int16(code),
55
+ E.write_int64(offset)
56
+ ].join
57
+ end
58
+
59
+ def self.decode(io)
60
+ new(
61
+ E.read_int32(io), # ID
62
+ E.read_int16(io), # Code
63
+ E.read_int64(io) # Offset
64
+ )
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,37 @@
1
+ module KafkaSyrup
2
+ module Protocol
3
+ class Request < Base
4
+
5
+ class << self
6
+ attr_accessor :api_key
7
+ end
8
+
9
+ attr_accessor :correlation_id, :client_id
10
+
11
+ def initialize(*args)
12
+ load_args(defaults)
13
+ load_args(*args)
14
+ end
15
+
16
+ def api_key
17
+ self.class.api_key
18
+ end
19
+
20
+ def api_version
21
+ 0
22
+ end
23
+
24
+ def encode(&block)
25
+ super do
26
+ [
27
+ E.write_int16(api_key),
28
+ E.write_int16(api_version),
29
+ E.write_int32(correlation_id.to_i),
30
+ E.write_string(client_id.to_s),
31
+ block_given? ? yield : ""
32
+ ].join
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,22 @@
1
+ module KafkaSyrup
2
+ module Protocol
3
+ class Response < Base
4
+
5
+ attr_accessor :correlation_id
6
+
7
+ def encode(&block)
8
+ super do
9
+ [
10
+ E.write_int32(correlation_id.to_i),
11
+ block_given? ? yield : ""
12
+ ].join
13
+ end
14
+ end
15
+
16
+ def decode(io)
17
+ super(io)
18
+ self.correlation_id = E.read_int32(io)
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,301 @@
1
+ require 'zk'
2
+
3
+ module KafkaSyrup
4
+ class TopicConsumer
5
+ include Utils
6
+
7
+ attr_accessor :topic, :group, :consumer_id, :watcher, :partitions, :offsets, :lock, :threads, :messages, :control_queues, :offset_mode, :max_bytes, :commit_mode, :uncommitted_partitions
8
+
9
+ def initialize(*args)
10
+ load_args(defaults)
11
+ load_args(*args)
12
+ end
13
+
14
+ def defaults
15
+ {
16
+ consumer_id: "#{Socket.gethostname}-#{hash}",
17
+ partitions: [],
18
+ offsets: {},
19
+ lock: Mutex.new,
20
+ threads: [],
21
+ messages: Queue.new,
22
+ control_queues: [],
23
+ offset_mode: :latest,
24
+ commit_mode: :fetch,
25
+ uncommitted_partitions: []
26
+ }
27
+ end
28
+
29
+ def zk
30
+ if @zk.is_a?(ZK::Client::Threaded) && @zk.connected?
31
+ @zk
32
+ else
33
+ @zk = ZK.new(KafkaSyrup.config.zookeeper_hosts, chroot: :check)
34
+ end
35
+ end
36
+
37
+ def register
38
+ return if registered?
39
+
40
+ # Make sure nodes exist in Zookeeper
41
+ zk.mkdir_p(membership_path)
42
+ zk.mkdir_p(ownership_path)
43
+ zk.mkdir_p(offsets_path)
44
+
45
+ begin
46
+ zk.create(registration_path, MultiJson.dump(new_registration_info), ephemeral: true)
47
+
48
+ rescue ZK::Exceptions::NodeExists
49
+ no_node = false
50
+ begin
51
+ zk.set(registration_path, MultiJson.dump(new_registration_info))
52
+ rescue ::ZK::Exceptions::NoNode
53
+ no_node = true
54
+ end
55
+ retry if no_node
56
+ ensure
57
+ self.watcher = zk.register(membership_path) do |event|
58
+ rebalance
59
+ zk.children(membership_path, watch: true)
60
+ trigger_fetch
61
+ end
62
+ zk.children(membership_path, watch: true)
63
+ rebalance
64
+ registered?
65
+ end
66
+ end
67
+
68
+ def unregister
69
+ watcher.unregister
70
+ threads.each(&:kill)
71
+ [threads, messages, control_queues].each(&:clear)
72
+ release_partitions
73
+ zk.rm_rf([membership_path, consumer_id]*'/')
74
+ end
75
+
76
+ def rebalance
77
+ raise NotRegistered unless registered?
78
+
79
+ log.debug "Rebalance triggered on group #{group}"
80
+
81
+ # Make sure fetch doesn't attempt to store offsets during a rebalance
82
+ lock.synchronize do
83
+ begin
84
+ # Stop fetcher threads and clear locally cached messages
85
+ threads.each(&:kill)
86
+ [threads, messages, control_queues].each(&:clear)
87
+
88
+ # Relinquish client claims to whatever partitions it's currently serving
89
+ release_partitions
90
+
91
+ # Determine which partitions to claim
92
+ self.partitions = partition_ids_to_claim
93
+
94
+ # Attempt to claim desired partitions in zookeeper
95
+ partitions.each{ |id| zk.create([ownership_path, id]*'/', consumer_id.to_s, ephemeral: true) }
96
+
97
+ # Retrieve offsets for successfully claimed partitions
98
+ partitions.each{ |id| offsets[id] = get_offset(id) }
99
+
100
+ # Start fetcher threads for partitions
101
+ partitions.each(&method(:start_fetcher_thread))
102
+ sleep 0.01
103
+
104
+ rescue ZK::Exceptions::NodeExists
105
+ # It's possible that another consumer has not yet released the partition this client is attempting to claim
106
+ # No biggie - release any partitions this client has already claimed, backoff a bit, and retry
107
+ release_partitions
108
+ sleep 0.2
109
+ retry
110
+ end
111
+ end
112
+ end
113
+
114
+ def fetch(limit = nil)
115
+ raise NotRegistered unless registered?
116
+
117
+ commit if commit_mode == :fetch
118
+
119
+ trigger_fetch if messages.empty?
120
+
121
+ results = []
122
+
123
+ loop do
124
+ results << messages.pop
125
+ break if messages.empty? || limit && limit == results.count
126
+ end
127
+
128
+ # Ensure rebalancing isn't adjusting the offsets
129
+ lock.synchronize do
130
+ results.each do |msg|
131
+ self.offsets[msg[:partition]] = msg[:offset] + 1
132
+ self.uncommitted_partitions |= [msg[:partition]]
133
+ end
134
+ end
135
+
136
+ commit if commit_mode == :auto
137
+
138
+ results
139
+ end
140
+
141
+ def release_partitions
142
+ partitions.each{ |id| zk.rm_rf([ownership_path, id]*'/') }
143
+
144
+ partitions.clear
145
+ offsets.clear
146
+ end
147
+
148
+ def get_offset(id)
149
+ offset, _ = zk.get([offsets_path, id]*'/')
150
+ offset.to_i
151
+ rescue ZK::Exceptions::NoNode
152
+ nil
153
+ end
154
+
155
+ def set_offset(id, offset)
156
+ log.debug "Committing offset #{offset} of partition #{id}"
157
+ zk.set([offsets_path, id]*'/', offset.to_s)
158
+ rescue ZK::Exceptions::NoNode
159
+ node_exists = false
160
+ begin
161
+ zk.create([offsets_path, id]*'/', offset.to_s)
162
+ rescue ZK::Exceptions::NodeExists
163
+ node_exists = true
164
+ end
165
+ retry if node_exists
166
+ end
167
+
168
+ def commit
169
+ uncommitted_partitions.each do |id|
170
+ set_offset(id, offsets[id])
171
+ end
172
+
173
+ uncommitted_partitions.clear
174
+ end
175
+
176
+ def subscribers
177
+ zk.children(membership_path).select do |member|
178
+ info, _ = zk.get([membership_path, member]*'/')
179
+
180
+ MultiJson.load(info)['subscription'][topic.to_s].to_i > 0
181
+ end
182
+ end
183
+
184
+ def registered?
185
+ info = current_registration_info
186
+ info.is_a?(Hash) && info['subscription'].is_a?(Hash) && info['subscription'][topic.to_s].to_i > 0
187
+ end
188
+
189
+ def new_registration_info
190
+ info = current_registration_info || {
191
+ 'pattern' => 'static',
192
+ 'version' => 1,
193
+ 'subscription' => {}
194
+ }
195
+
196
+ info['subscription'][topic.to_s] = info['subscription'][topic.to_s].to_i + 1
197
+
198
+ info
199
+ end
200
+
201
+ def current_registration_info
202
+ info, _ = zk.get(registration_path)
203
+
204
+ MultiJson.load(info)
205
+ rescue ZK::Exceptions::NoNode
206
+ nil
207
+ end
208
+
209
+ def group_path
210
+ ([KafkaSyrup.config.zookeeper_path, 'consumers', group]*'/').gsub(/\/\/+/, '/')
211
+ end
212
+
213
+ def membership_path
214
+ [group_path, 'ids']*'/'
215
+ end
216
+
217
+ def ownership_path
218
+ [group_path, 'owners', topic.to_s]*'/'
219
+ end
220
+
221
+ def offsets_path
222
+ [group_path, 'offsets', topic.to_s]*'/'
223
+ end
224
+
225
+ def registration_path
226
+ [membership_path, consumer_id]*'/'
227
+ end
228
+
229
+ private
230
+
231
+ def partition_ids_to_claim
232
+ topic_partitions = KafkaSyrup.get_metadata.topics.detect{ |t| t.name == topic.to_s }.partitions
233
+
234
+ consumers = subscribers.sort
235
+
236
+ partitions_per_consumer = topic_partitions.size / consumers.size
237
+
238
+ extra_partitions = topic_partitions.size % consumers.size
239
+
240
+ consumer_position = consumers.index(consumer_id.to_s)
241
+
242
+ starting_index = consumer_position * partitions_per_consumer + [consumer_position, extra_partitions].min
243
+
244
+ num_to_claim = partitions_per_consumer
245
+
246
+ num_to_claim += 1 if consumer_position + 1 <= extra_partitions
247
+
248
+ topic_partitions.map(&:id).sort.slice(starting_index, num_to_claim)
249
+ end
250
+
251
+ def start_fetcher_thread(id)
252
+ # Initialize new control queue
253
+ q = Queue.new
254
+
255
+ # Intitialize consumer for this partition
256
+ opts = { topic: topic, partition: id, offset: offsets[id] || offset_mode }
257
+ opts[:max_bytes] = max_bytes if max_bytes
258
+ consumer = PartitionConsumer.new(opts)
259
+
260
+ # Fetch on a thread for concurrency
261
+ threads << Thread.new do
262
+ log.debug "Starting Fetcher Thread for partition #{id}"
263
+ loop do
264
+ begin
265
+ q.pop # wait for start message
266
+ log.debug "Fetching from partition #{id}"
267
+
268
+ num_received = 0
269
+ while num_received == 0
270
+ begin
271
+ consumer.fetch_from_broker do |msg|
272
+ messages.push partition: id, offset: msg.offset, message: msg.value
273
+ num_received += 1
274
+ end
275
+
276
+ # No messages received means that the partition has no messages to consume at this time so wait a bit before trying again
277
+ sleep retry_backoff if num_received == 0
278
+ rescue
279
+ sleep retry_backoff
280
+ retry
281
+ end
282
+ end
283
+ rescue
284
+ sleep retry_backoff
285
+ end
286
+ end
287
+ end
288
+
289
+ # Store control queue for later use to control the fetcher thread
290
+ control_queues << q
291
+ end
292
+
293
+ def retry_backoff
294
+ @retry_backoff ||= KafkaSyrup.config.retry_backoff / 1000.0
295
+ end
296
+
297
+ def trigger_fetch
298
+ control_queues.reject{ |q| q.num_waiting == 0 }.each{ |q| q.push(:fetch) }
299
+ end
300
+ end
301
+ end