kafka_syrup 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,53 @@
1
+ module KafkaSyrup
2
+ module Protocol
3
+ class OffsetRequest < Request
4
+ self.api_key = 2
5
+
6
+ attr_accessor :topics
7
+
8
+ def defaults
9
+ { topics: [] }
10
+ end
11
+
12
+ def add_topic(name)
13
+ topic = Topic.new(name, [])
14
+ topics << topic
15
+ topic
16
+ end
17
+
18
+ def encode
19
+ super do
20
+ [
21
+ E.write_int32(REPLICA_ID),
22
+ E.write_array(topics)
23
+ ].join
24
+ end
25
+ end
26
+
27
+ Topic = Struct.new(:name, :partitions) do
28
+ def add_partition(id, time = -1, max_offsets = 1)
29
+ partition = Partition.new(id, time, max_offsets)
30
+ partitions << partition
31
+ partition
32
+ end
33
+
34
+ def encode
35
+ [
36
+ E.write_string(name),
37
+ E.write_array(partitions)
38
+ ].join
39
+ end
40
+ end
41
+
42
+ Partition = Struct.new(:id, :time, :max_offsets) do
43
+ def encode
44
+ [
45
+ E.write_int32(id),
46
+ E.write_int64(time),
47
+ E.write_int32(max_offsets)
48
+ ].join
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,69 @@
1
+ module KafkaSyrup
2
+ module Protocol
3
+ class OffsetResponse < Response
4
+ attr_accessor :topics
5
+
6
+ def defaults
7
+ { topics: [] }
8
+ end
9
+
10
+ def add_topic(name)
11
+ topic = Topic.new(name, [])
12
+ topics << topic
13
+ topic
14
+ end
15
+
16
+ def encode
17
+ super do
18
+ E.write_array(topics)
19
+ end
20
+ end
21
+
22
+ def decode(io)
23
+ super
24
+ self.topics = E.read_array(io, &Topic.method(:decode))
25
+ topics.flat_map(&:partitions).map(&:code).each(&KafkaResponseErrors.method(:raise_from_code))
26
+ end
27
+
28
+ Topic = Struct.new(:name, :partitions) do
29
+ def add_partition(id, code, offsets)
30
+ partition = Partition.new(id, code, offsets)
31
+ partitions << partition
32
+ partition
33
+ end
34
+
35
+ def encode
36
+ [
37
+ E.write_string(name),
38
+ E.write_array(partitions)
39
+ ].join
40
+ end
41
+
42
+ def self.decode(io)
43
+ new(
44
+ E.read_string(io), # Name
45
+ E.read_array(io, &Partition.method(:decode)), # Partitions
46
+ )
47
+ end
48
+ end
49
+
50
+ Partition = Struct.new(:id, :code, :offsets) do
51
+ def encode
52
+ [
53
+ E.write_int32(id),
54
+ E.write_int16(code),
55
+ E.write_array(offsets, &E.method(:write_int64))
56
+ ].join
57
+ end
58
+
59
+ def self.decode(io)
60
+ new(
61
+ E.read_int32(io), # ID
62
+ E.read_int16(io), # Error Code
63
+ E.read_array(io, &E.method(:read_int64)) # Offsets
64
+ )
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,69 @@
1
+ module KafkaSyrup
2
+ module Protocol
3
+ class ProduceRequest < Request
4
+ self.api_key = 0
5
+
6
+ attr_accessor :required_acks, :timeout, :topics
7
+
8
+ def defaults
9
+ {
10
+ required_acks: config.produce_required_acks,
11
+ timeout: config.produce_timeout,
12
+ topics: []
13
+ }
14
+ end
15
+
16
+ def add_topic(name)
17
+ topic = Topic.new(name, [])
18
+ topics << topic
19
+ topic
20
+ end
21
+
22
+ def encode
23
+ super do
24
+ [
25
+ E.write_int16(required_acks),
26
+ E.write_int32(timeout),
27
+ E.write_array(topics)
28
+ ].join
29
+ end
30
+ end
31
+
32
+ Topic = Struct.new(:name, :partitions) do
33
+ def add_partition(id)
34
+ partition = Partition.new(id, MessageSet.new)
35
+ partitions << partition
36
+ partition
37
+ end
38
+
39
+ def encode
40
+ [
41
+ E.write_string(name),
42
+ E.write_array(partitions)
43
+ ].join
44
+ end
45
+ end
46
+
47
+ Partition = Struct.new(:id, :message_set) do
48
+ def messages
49
+ message_set.messages
50
+ end
51
+
52
+ def add_message(value = nil, opts = {})
53
+ msg = Message.new(opts.merge(value: value))
54
+ message_set.messages << msg
55
+ msg
56
+ end
57
+
58
+ def encode
59
+ encoded = message_set.encode
60
+ [
61
+ E.write_int32(id),
62
+ E.write_int32(encoded.length),
63
+ encoded
64
+ ].join
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,69 @@
1
+ module KafkaSyrup
2
+ module Protocol
3
+ class ProduceResponse < Response
4
+ attr_accessor :topics
5
+
6
+ def defaults
7
+ { topics: [] }
8
+ end
9
+
10
+ def add_topic(name)
11
+ topic = Topic.new(name, [])
12
+ topics << topic
13
+ topic
14
+ end
15
+
16
+ def encode
17
+ super do
18
+ E.write_array(topics)
19
+ end
20
+ end
21
+
22
+ def decode(io)
23
+ super
24
+ self.topics = E.read_array(io, &Topic.method(:decode))
25
+ topics.flat_map(&:partitions).map(&:code).each(&KafkaResponseErrors.method(:raise_from_code))
26
+ end
27
+
28
+ Topic = Struct.new(:name, :partitions) do
29
+ def add_partition(id, code, offset)
30
+ partition = Partition.new(id, code, offset)
31
+ partitions << partition
32
+ partition
33
+ end
34
+
35
+ def encode
36
+ [
37
+ E.write_string(name),
38
+ E.write_array(partitions)
39
+ ].join
40
+ end
41
+
42
+ def self.decode(io)
43
+ new(
44
+ E.read_string(io), # Name
45
+ E.read_array(io, &Partition.method(:decode)) # Partitions
46
+ )
47
+ end
48
+ end
49
+
50
+ Partition = Struct.new(:id, :code, :offset) do
51
+ def encode
52
+ [
53
+ E.write_int32(id),
54
+ E.write_int16(code),
55
+ E.write_int64(offset)
56
+ ].join
57
+ end
58
+
59
+ def self.decode(io)
60
+ new(
61
+ E.read_int32(io), # ID
62
+ E.read_int16(io), # Code
63
+ E.read_int64(io) # Offset
64
+ )
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,37 @@
1
+ module KafkaSyrup
2
+ module Protocol
3
+ class Request < Base
4
+
5
+ class << self
6
+ attr_accessor :api_key
7
+ end
8
+
9
+ attr_accessor :correlation_id, :client_id
10
+
11
+ def initialize(*args)
12
+ load_args(defaults)
13
+ load_args(*args)
14
+ end
15
+
16
+ def api_key
17
+ self.class.api_key
18
+ end
19
+
20
+ def api_version
21
+ 0
22
+ end
23
+
24
+ def encode(&block)
25
+ super do
26
+ [
27
+ E.write_int16(api_key),
28
+ E.write_int16(api_version),
29
+ E.write_int32(correlation_id.to_i),
30
+ E.write_string(client_id.to_s),
31
+ block_given? ? yield : ""
32
+ ].join
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,22 @@
1
+ module KafkaSyrup
2
+ module Protocol
3
+ class Response < Base
4
+
5
+ attr_accessor :correlation_id
6
+
7
+ def encode(&block)
8
+ super do
9
+ [
10
+ E.write_int32(correlation_id.to_i),
11
+ block_given? ? yield : ""
12
+ ].join
13
+ end
14
+ end
15
+
16
+ def decode(io)
17
+ super(io)
18
+ self.correlation_id = E.read_int32(io)
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,301 @@
1
+ require 'zk'
2
+
3
+ module KafkaSyrup
4
+ class TopicConsumer
5
+ include Utils
6
+
7
+ attr_accessor :topic, :group, :consumer_id, :watcher, :partitions, :offsets, :lock, :threads, :messages, :control_queues, :offset_mode, :max_bytes, :commit_mode, :uncommitted_partitions
8
+
9
+ def initialize(*args)
10
+ load_args(defaults)
11
+ load_args(*args)
12
+ end
13
+
14
+ def defaults
15
+ {
16
+ consumer_id: "#{Socket.gethostname}-#{hash}",
17
+ partitions: [],
18
+ offsets: {},
19
+ lock: Mutex.new,
20
+ threads: [],
21
+ messages: Queue.new,
22
+ control_queues: [],
23
+ offset_mode: :latest,
24
+ commit_mode: :fetch,
25
+ uncommitted_partitions: []
26
+ }
27
+ end
28
+
29
+ def zk
30
+ if @zk.is_a?(ZK::Client::Threaded) && @zk.connected?
31
+ @zk
32
+ else
33
+ @zk = ZK.new(KafkaSyrup.config.zookeeper_hosts, chroot: :check)
34
+ end
35
+ end
36
+
37
+ def register
38
+ return if registered?
39
+
40
+ # Make sure nodes exist in Zookeeper
41
+ zk.mkdir_p(membership_path)
42
+ zk.mkdir_p(ownership_path)
43
+ zk.mkdir_p(offsets_path)
44
+
45
+ begin
46
+ zk.create(registration_path, MultiJson.dump(new_registration_info), ephemeral: true)
47
+
48
+ rescue ZK::Exceptions::NodeExists
49
+ no_node = false
50
+ begin
51
+ zk.set(registration_path, MultiJson.dump(new_registration_info))
52
+ rescue ::ZK::Exceptions::NoNode
53
+ no_node = true
54
+ end
55
+ retry if no_node
56
+ ensure
57
+ self.watcher = zk.register(membership_path) do |event|
58
+ rebalance
59
+ zk.children(membership_path, watch: true)
60
+ trigger_fetch
61
+ end
62
+ zk.children(membership_path, watch: true)
63
+ rebalance
64
+ registered?
65
+ end
66
+ end
67
+
68
+ def unregister
69
+ watcher.unregister
70
+ threads.each(&:kill)
71
+ [threads, messages, control_queues].each(&:clear)
72
+ release_partitions
73
+ zk.rm_rf([membership_path, consumer_id]*'/')
74
+ end
75
+
76
+ def rebalance
77
+ raise NotRegistered unless registered?
78
+
79
+ log.debug "Rebalance triggered on group #{group}"
80
+
81
+ # Make sure fetch doesn't attempt to store offsets during a rebalance
82
+ lock.synchronize do
83
+ begin
84
+ # Stop fetcher threads and clear locally cached messages
85
+ threads.each(&:kill)
86
+ [threads, messages, control_queues].each(&:clear)
87
+
88
+ # Relinquish client claims to whatever partitions it's currently serving
89
+ release_partitions
90
+
91
+ # Determine which partitions to claim
92
+ self.partitions = partition_ids_to_claim
93
+
94
+ # Attempt to claim desired partitions in zookeeper
95
+ partitions.each{ |id| zk.create([ownership_path, id]*'/', consumer_id.to_s, ephemeral: true) }
96
+
97
+ # Retrieve offsets for successfully claimed partitions
98
+ partitions.each{ |id| offsets[id] = get_offset(id) }
99
+
100
+ # Start fetcher threads for partitions
101
+ partitions.each(&method(:start_fetcher_thread))
102
+ sleep 0.01
103
+
104
+ rescue ZK::Exceptions::NodeExists
105
+ # It's possible that another consumer has not yet released the partition this client is attempting to claim
106
+ # No biggie - release any partitions this client has already claimed, backoff a bit, and retry
107
+ release_partitions
108
+ sleep 0.2
109
+ retry
110
+ end
111
+ end
112
+ end
113
+
114
+ def fetch(limit = nil)
115
+ raise NotRegistered unless registered?
116
+
117
+ commit if commit_mode == :fetch
118
+
119
+ trigger_fetch if messages.empty?
120
+
121
+ results = []
122
+
123
+ loop do
124
+ results << messages.pop
125
+ break if messages.empty? || limit && limit == results.count
126
+ end
127
+
128
+ # Ensure rebalancing isn't adjusting the offsets
129
+ lock.synchronize do
130
+ results.each do |msg|
131
+ self.offsets[msg[:partition]] = msg[:offset] + 1
132
+ self.uncommitted_partitions |= [msg[:partition]]
133
+ end
134
+ end
135
+
136
+ commit if commit_mode == :auto
137
+
138
+ results
139
+ end
140
+
141
+ def release_partitions
142
+ partitions.each{ |id| zk.rm_rf([ownership_path, id]*'/') }
143
+
144
+ partitions.clear
145
+ offsets.clear
146
+ end
147
+
148
+ def get_offset(id)
149
+ offset, _ = zk.get([offsets_path, id]*'/')
150
+ offset.to_i
151
+ rescue ZK::Exceptions::NoNode
152
+ nil
153
+ end
154
+
155
+ def set_offset(id, offset)
156
+ log.debug "Committing offset #{offset} of partition #{id}"
157
+ zk.set([offsets_path, id]*'/', offset.to_s)
158
+ rescue ZK::Exceptions::NoNode
159
+ node_exists = false
160
+ begin
161
+ zk.create([offsets_path, id]*'/', offset.to_s)
162
+ rescue ZK::Exceptions::NodeExists
163
+ node_exists = true
164
+ end
165
+ retry if node_exists
166
+ end
167
+
168
+ def commit
169
+ uncommitted_partitions.each do |id|
170
+ set_offset(id, offsets[id])
171
+ end
172
+
173
+ uncommitted_partitions.clear
174
+ end
175
+
176
+ def subscribers
177
+ zk.children(membership_path).select do |member|
178
+ info, _ = zk.get([membership_path, member]*'/')
179
+
180
+ MultiJson.load(info)['subscription'][topic.to_s].to_i > 0
181
+ end
182
+ end
183
+
184
+ def registered?
185
+ info = current_registration_info
186
+ info.is_a?(Hash) && info['subscription'].is_a?(Hash) && info['subscription'][topic.to_s].to_i > 0
187
+ end
188
+
189
+ def new_registration_info
190
+ info = current_registration_info || {
191
+ 'pattern' => 'static',
192
+ 'version' => 1,
193
+ 'subscription' => {}
194
+ }
195
+
196
+ info['subscription'][topic.to_s] = info['subscription'][topic.to_s].to_i + 1
197
+
198
+ info
199
+ end
200
+
201
+ def current_registration_info
202
+ info, _ = zk.get(registration_path)
203
+
204
+ MultiJson.load(info)
205
+ rescue ZK::Exceptions::NoNode
206
+ nil
207
+ end
208
+
209
+ def group_path
210
+ ([KafkaSyrup.config.zookeeper_path, 'consumers', group]*'/').gsub(/\/\/+/, '/')
211
+ end
212
+
213
+ def membership_path
214
+ [group_path, 'ids']*'/'
215
+ end
216
+
217
+ def ownership_path
218
+ [group_path, 'owners', topic.to_s]*'/'
219
+ end
220
+
221
+ def offsets_path
222
+ [group_path, 'offsets', topic.to_s]*'/'
223
+ end
224
+
225
+ def registration_path
226
+ [membership_path, consumer_id]*'/'
227
+ end
228
+
229
+ private
230
+
231
+ def partition_ids_to_claim
232
+ topic_partitions = KafkaSyrup.get_metadata.topics.detect{ |t| t.name == topic.to_s }.partitions
233
+
234
+ consumers = subscribers.sort
235
+
236
+ partitions_per_consumer = topic_partitions.size / consumers.size
237
+
238
+ extra_partitions = topic_partitions.size % consumers.size
239
+
240
+ consumer_position = consumers.index(consumer_id.to_s)
241
+
242
+ starting_index = consumer_position * partitions_per_consumer + [consumer_position, extra_partitions].min
243
+
244
+ num_to_claim = partitions_per_consumer
245
+
246
+ num_to_claim += 1 if consumer_position + 1 <= extra_partitions
247
+
248
+ topic_partitions.map(&:id).sort.slice(starting_index, num_to_claim)
249
+ end
250
+
251
+ def start_fetcher_thread(id)
252
+ # Initialize new control queue
253
+ q = Queue.new
254
+
255
+ # Intitialize consumer for this partition
256
+ opts = { topic: topic, partition: id, offset: offsets[id] || offset_mode }
257
+ opts[:max_bytes] = max_bytes if max_bytes
258
+ consumer = PartitionConsumer.new(opts)
259
+
260
+ # Fetch on a thread for concurrency
261
+ threads << Thread.new do
262
+ log.debug "Starting Fetcher Thread for partition #{id}"
263
+ loop do
264
+ begin
265
+ q.pop # wait for start message
266
+ log.debug "Fetching from partition #{id}"
267
+
268
+ num_received = 0
269
+ while num_received == 0
270
+ begin
271
+ consumer.fetch_from_broker do |msg|
272
+ messages.push partition: id, offset: msg.offset, message: msg.value
273
+ num_received += 1
274
+ end
275
+
276
+ # No messages received means that the partition has no messages to consume at this time so wait a bit before trying again
277
+ sleep retry_backoff if num_received == 0
278
+ rescue
279
+ sleep retry_backoff
280
+ retry
281
+ end
282
+ end
283
+ rescue
284
+ sleep retry_backoff
285
+ end
286
+ end
287
+ end
288
+
289
+ # Store control queue for later use to control the fetcher thread
290
+ control_queues << q
291
+ end
292
+
293
+ def retry_backoff
294
+ @retry_backoff ||= KafkaSyrup.config.retry_backoff / 1000.0
295
+ end
296
+
297
+ def trigger_fetch
298
+ control_queues.reject{ |q| q.num_waiting == 0 }.each{ |q| q.push(:fetch) }
299
+ end
300
+ end
301
+ end