kafkat-onfocusio 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +8 -0
  3. data/.rspec +2 -0
  4. data/.simplecov +5 -0
  5. data/.travis.yml +10 -0
  6. data/CHANGELOG.md +12 -0
  7. data/Gemfile +2 -0
  8. data/LICENSE.txt +202 -0
  9. data/README.md +72 -0
  10. data/Rakefile +25 -0
  11. data/_kafkat +21 -0
  12. data/bin/kafkat +4 -0
  13. data/kafkat.gemspec +34 -0
  14. data/lib/kafkat/cli.rb +71 -0
  15. data/lib/kafkat/cluster/assignment.rb +4 -0
  16. data/lib/kafkat/cluster/broker.rb +4 -0
  17. data/lib/kafkat/cluster/partition.rb +11 -0
  18. data/lib/kafkat/cluster/topic.rb +4 -0
  19. data/lib/kafkat/cluster.rb +4 -0
  20. data/lib/kafkat/command/brokers.rb +16 -0
  21. data/lib/kafkat/command/clean-indexes.rb +30 -0
  22. data/lib/kafkat/command/cluster_restart.rb +336 -0
  23. data/lib/kafkat/command/controller.rb +18 -0
  24. data/lib/kafkat/command/drain.rb +109 -0
  25. data/lib/kafkat/command/elect-leaders.rb +31 -0
  26. data/lib/kafkat/command/partitions.rb +50 -0
  27. data/lib/kafkat/command/reassign.rb +80 -0
  28. data/lib/kafkat/command/resign-rewrite.rb +76 -0
  29. data/lib/kafkat/command/set-replication-factor.rb +173 -0
  30. data/lib/kafkat/command/shutdown.rb +30 -0
  31. data/lib/kafkat/command/topics.rb +16 -0
  32. data/lib/kafkat/command/verify-reassign.rb +18 -0
  33. data/lib/kafkat/command/verify-replicas.rb +92 -0
  34. data/lib/kafkat/command.rb +70 -0
  35. data/lib/kafkat/config.rb +50 -0
  36. data/lib/kafkat/interface/admin.rb +115 -0
  37. data/lib/kafkat/interface/kafka_logs.rb +54 -0
  38. data/lib/kafkat/interface/zookeeper.rb +178 -0
  39. data/lib/kafkat/interface.rb +3 -0
  40. data/lib/kafkat/reboot.rb +0 -0
  41. data/lib/kafkat/utility/command_io.rb +21 -0
  42. data/lib/kafkat/utility/formatting.rb +68 -0
  43. data/lib/kafkat/utility/logging.rb +7 -0
  44. data/lib/kafkat/utility.rb +4 -0
  45. data/lib/kafkat/version.rb +3 -0
  46. data/lib/kafkat.rb +14 -0
  47. data/spec/factories/topic.rb +53 -0
  48. data/spec/lib/kafkat/command/cluster_restart_spec.rb +197 -0
  49. data/spec/lib/kafkat/command/drain_spec.rb +59 -0
  50. data/spec/lib/kafkat/command/verify-replicas_spec.rb +50 -0
  51. data/spec/spec_helper.rb +102 -0
  52. metadata +294 -0
@@ -0,0 +1,336 @@
1
+ module Kafkat
2
+ module Command
3
+ class ClusterRestart < Base
4
+
5
+ register_as 'cluster-restart'
6
+
7
+ usage 'cluster-restart help', 'Determine the server restart sequence for kafka'
8
+
9
+ def run
10
+ subcommand_name = ARGV.shift || 'help'
11
+ begin
12
+ subcommand_class = ['Kafkat', 'ClusterRestart', 'Subcommands', subcommand_name.capitalize].inject(Object) do |mod, class_name|
13
+ mod.const_get(class_name)
14
+ end
15
+ subcommand_class.new(config).run
16
+ rescue NameError
17
+ print "ERROR: Unknown command #{subcommand_name}"
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
23
+
24
+ module Kafkat
25
+ module ClusterRestart
26
+ module Subcommands
27
+
28
+ class Help < ::Kafkat::Command::Base
29
+ def run
30
+ puts 'cluster-restart help Print Help and exit'
31
+ puts 'cluster-restart reset Clean up the restart state'
32
+ puts 'cluster-restart start Initialize the cluster-restart session for the cluster'
33
+ puts 'cluster-restart next Calculate the next broker to restart based on the current state'
34
+ puts 'cluster-restart good <broker> Mark this broker as successfully restarted'
35
+ puts 'cluster-restart log Print the state of the brokers'
36
+ puts 'cluster-restart restore <file> Start a new session and restore the state defined in that file'
37
+ end
38
+ end
39
+
40
+ class Start < ::Kafkat::Command::Base
41
+
42
+ attr_reader :session
43
+
44
+ def run
45
+ if Session.exists?
46
+ puts "ERROR: A session is already started"
47
+ puts "\n[Action] Please run 'next' or 'reset' commands"
48
+ exit 1
49
+ end
50
+
51
+ print "Starting a new Cluster-Restart session.\n"
52
+
53
+ @session = Session.from_zookeepers(zookeeper)
54
+ @session.save!
55
+
56
+ puts "\n[Action] Please run 'next' to select the broker with lowest restarting cost"
57
+ end
58
+ end
59
+
60
+ class Reset < ::Kafkat::Command::Base
61
+
62
+ def run
63
+ if Session.exists?
64
+ Session.reset!
65
+ end
66
+ puts "Session reset"
67
+ puts "\n[Action] Please run 'start' to start the session"
68
+ end
69
+ end
70
+
71
+ class Restore < ::Kafkat::Command::Base
72
+
73
+ attr_reader :session
74
+
75
+ def run
76
+ if Session.exists?
77
+ puts "ERROR: A session is already started"
78
+ puts "\n[Action] Please run 'next' or 'reset' commands"
79
+ exit 1
80
+ end
81
+
82
+ file_name = ARGV[0]
83
+ @session = Session.load!(file_name)
84
+ @session.save!
85
+ puts "Session restored"
86
+ puts "\m[Action] Please run 'next' to select the broker with lowest restarting cost"
87
+ end
88
+ end
89
+
90
+ class Next < ::Kafkat::Command::Base
91
+
92
+ attr_reader :session, :topics
93
+
94
+ def run
95
+ unless Session.exists?
96
+ puts "ERROR: no session in progress"
97
+ puts "\n[Action] Please run 'start' command"
98
+ exit 1
99
+ end
100
+
101
+ @session = Session.load!
102
+ if @session.all_restarted?
103
+ puts "All the brokers have been restarted"
104
+ else
105
+ pendings = @session.pending_brokers
106
+ if pendings.size > 1
107
+ puts "ERROR Illegal state: multiple brokers are in Pending state"
108
+ exit 1
109
+ elsif pendings.size == 1
110
+ next_broker = pendings[0]
111
+ puts "Broker #{next_broker} is in Pending state"
112
+ else
113
+ @topics = zookeeper.get_topics
114
+ next_broker, cost = ClusterRestartHelper.select_broker_with_min_cost(session, topics)
115
+ @session.update_states!(Session::STATE_PENDING, [next_broker])
116
+ @session.save!
117
+ puts "The next broker is: #{next_broker}"
118
+ end
119
+ puts "\n[Action-1] Restart broker #{next_broker} aka #{zookeeper.get_broker(next_broker).host}"
120
+ puts "\n[Action-2] Run 'good #{next_broker}' to mark it as restarted."
121
+ end
122
+ end
123
+ end
124
+
125
+ class Log < ::Kafkat::Command::Base
126
+
127
+ attr_reader :session
128
+
129
+ def run
130
+ unless Session.exists?
131
+ puts "ERROR: no session in progress"
132
+ puts "\n[Action] Please run 'start' command"
133
+ exit 1
134
+ end
135
+
136
+ @session = Session.load!
137
+ puts JSON.pretty_generate(@session.to_h)
138
+ end
139
+ end
140
+
141
+ class Good < ::Kafkat::Command::Base
142
+
143
+ attr_reader :session
144
+
145
+ def run
146
+ unless Session.exists?
147
+ puts "ERROR: no session in progress"
148
+ puts "\n[Action] Please run 'start' command"
149
+ exit 1
150
+ end
151
+
152
+ broker_id = ARGV[0]
153
+ if broker_id.nil?
154
+ puts "ERROR You must specify a broker id"
155
+ exit 1
156
+ end
157
+ restart(broker_id)
158
+ puts "Broker #{broker_id} has been marked as restarted"
159
+ puts "\n[Action] Please run 'next' to select the broker with lowest restarting cost"
160
+ end
161
+
162
+ def restart(broker_id)
163
+ @session = Session.load!
164
+ begin
165
+ if session.pending?(broker_id)
166
+ session.update_states!(Session::STATE_RESTARTED, [broker_id])
167
+ session.save!
168
+ else
169
+ puts "ERROR Broker state is #{session.state(broker_id)}"
170
+ exit 1
171
+ end
172
+ rescue UnknownBrokerError => e
173
+ puts "ERROR #{e.to_s}"
174
+ exit 1
175
+ end
176
+ end
177
+ end
178
+ end
179
+
180
+ class UnknownBrokerError < StandardError;
181
+ end
182
+ class UnknownStateError < StandardError;
183
+ end
184
+
185
+ class ClusterRestartHelper
186
+
187
+ def self.select_broker_with_min_cost(session, topics)
188
+ broker_to_partition = get_broker_to_leader_partition_mapping(topics)
189
+ broker_restart_cost = Hash.new(0)
190
+ session.broker_states.each do |broker_id, state|
191
+ if state == Session::STATE_NOT_RESTARTED
192
+ current_cost = calculate_cost(broker_id, broker_to_partition[broker_id], session)
193
+ broker_restart_cost[broker_id] = current_cost if current_cost != nil
194
+ end
195
+ end
196
+
197
+ # Sort by cost first, and then broker_id
198
+ broker_restart_cost.min_by { |broker_id, cost| [cost, broker_id] }
199
+ end
200
+
201
+ def self.get_broker_to_leader_partition_mapping(topics)
202
+ broker_to_partitions = Hash.new { |h, key| h[key] = [] }
203
+
204
+ topics.values.flat_map { |topic| topic.partitions }
205
+ .each do |partition|
206
+ broker_to_partitions[partition.leader] << partition
207
+ end
208
+ broker_to_partitions
209
+ end
210
+
211
+ def self.calculate_cost(broker_id, partitions, session)
212
+ raise UnknownBrokerError, "Unknown broker #{broker_id}" unless session.broker_states.key?(broker_id)
213
+ partitions.find_all { |partition| partition.leader == broker_id }
214
+ .reduce(0) do |cost, partition|
215
+ cost += partition.replicas.length
216
+ cost -= partition.replicas.find_all { |replica| session.restarted?(replica) }.size
217
+ cost
218
+ end
219
+ end
220
+ end
221
+
222
+
223
+ class Session
224
+
225
+ SESSION_PATH = '~/kafkat_cluster_restart_session.json'
226
+ STATE_RESTARTED = 'restarted' # use String instead of Symbol to facilitate JSON ser/deser
227
+ STATE_NOT_RESTARTED = 'not_restarted'
228
+ STATE_PENDING = 'pending'
229
+ STATES= [STATE_NOT_RESTARTED, STATE_RESTARTED, STATE_PENDING]
230
+
231
+ class NotFoundError < StandardError;
232
+ end
233
+ class ParseError < StandardError;
234
+ end
235
+
236
+ attr_reader :broker_states
237
+
238
+ def self.exists?
239
+ File.file?(File.expand_path(SESSION_PATH))
240
+ end
241
+
242
+ def self.load!(session_file = SESSION_PATH)
243
+ path = File.expand_path(session_file)
244
+ string = File.read(path)
245
+
246
+ json = JSON.parse(string)
247
+ self.new(json)
248
+
249
+ rescue Errno::ENOENT
250
+ raise NotFoundError
251
+ rescue JSON::JSONError
252
+ raise ParseError
253
+ end
254
+
255
+ def self.reset!(session_file = SESSION_PATH)
256
+ path = File.expand_path(session_file)
257
+ File.delete(path)
258
+ end
259
+
260
+ def self.from_zookeepers(zookeeper)
261
+ broker_ids = zookeeper.get_broker_ids
262
+ Session.from_brokers(broker_ids)
263
+ end
264
+
265
+ def self.from_brokers(brokers)
266
+ states = brokers.each_with_object({}) { |id, h| h[id] = STATE_NOT_RESTARTED }
267
+ Session.new('broker_states' => states)
268
+ end
269
+
270
+ def initialize(data = {})
271
+ @broker_states = data['broker_states'] || {}
272
+ end
273
+
274
+ def save!(session_file = SESSION_PATH)
275
+ File.open(File.expand_path(session_file), 'w') do |f|
276
+ f.puts JSON.pretty_generate(self.to_h)
277
+ end
278
+ end
279
+
280
+ def update_states!(state, ids)
281
+ state = state.to_s if state.is_a?(Symbol)
282
+ unless STATES.include?(state)
283
+ raise UnknownStateError, "Unknown State #{state}"
284
+ end
285
+
286
+ intersection = ids & broker_states.keys
287
+ unless intersection == ids
288
+ raise UnknownBrokerError, "Unknown brokers: #{(ids - intersection).join(', ')}"
289
+ end
290
+
291
+ ids.each { |id| broker_states[id] = state }
292
+ self
293
+ end
294
+
295
+
296
+ def state(broker_id)
297
+ raise UnknownBrokerError, "Unknown broker: #{broker_id}" unless @broker_states.key?(broker_id)
298
+ broker_states[broker_id]
299
+ end
300
+
301
+ def state?(broker_id, state)
302
+ raise UnknownBrokerError, "Unknown broker: #{broker_id}" unless @broker_states.key?(broker_id)
303
+ raise UnknownStateError, "Unknown state: #{state}" unless STATES.include?(state)
304
+ @broker_states[broker_id] == state
305
+ end
306
+
307
+ def pending?(broker_id)
308
+ state?(broker_id, STATE_PENDING)
309
+ end
310
+
311
+ def not_restarted?(broker_id)
312
+ state?(broker_id, STATE_NOT_RESTARTED)
313
+ end
314
+
315
+ def restarted?(broker_id)
316
+ state?(broker_id, STATE_RESTARTED)
317
+ end
318
+
319
+ def all_restarted?
320
+ @broker_states.values.all? { |state| state == STATE_RESTARTED }
321
+ end
322
+
323
+ def pending_brokers
324
+ broker_states.keys.find_all do |broker_id|
325
+ broker_states[broker_id] == STATE_PENDING
326
+ end
327
+ end
328
+
329
+ def to_h
330
+ {
331
+ :broker_states => broker_states,
332
+ }
333
+ end
334
+ end
335
+ end
336
+ end
@@ -0,0 +1,18 @@
1
+ module Kafkat
2
+ module Command
3
+ class Controller < Base
4
+ register_as 'controller'
5
+
6
+ usage 'controller',
7
+ 'Print the current controller.'
8
+
9
+ def run
10
+ c = zookeeper.get_controller
11
+ print "The current controller is '#{c.id}' (#{c.host}:#{c.port}).\n"
12
+ rescue Interface::Zookeeper::NotFoundError
13
+ print "ERROR: Couldn't determine the current controller.\n"
14
+ exit 1
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,109 @@
1
+ module Kafkat
2
+ module Command
3
+ class Drain < Base
4
+
5
+ register_as 'drain'
6
+
7
+ usage 'drain <broker id> [--topic <t>] [--brokers <ids>]',
8
+ 'Reassign partitions from a specific broker to destination brokers.'
9
+
10
+ # For each partition (of specified topic) on the source broker, the command is to
11
+ # assign the partition to one of the destination brokers that does not already have
12
+ # this partition, along with existing brokers to achieve minimal movement of data.
13
+ # To help distribute data evenly, if there are more than one destination brokers
14
+ # meet the requirement, the command will always choose the brokers with the lowest
15
+ # number of partitions of the involving topic.
16
+ #
17
+ # In order to find out the broker with lowest number of partitions, the command maintain
18
+ # a hash table with broker id as key and number of partitions as value. The hash table
19
+ # will be updated along with assignment.
20
+ def run
21
+ source_broker = ARGV[0] && ARGV.shift.to_i
22
+ if source_broker.nil?
23
+ puts "You must specify a broker ID."
24
+ exit 1
25
+ end
26
+
27
+ opts = Trollop.options do
28
+ opt :brokers, "destination broker IDs", type: :string
29
+ opt :topic, "topic name to reassign", type: :string
30
+ end
31
+
32
+ topic_name = opts[:topic]
33
+ topics = topic_name && zookeeper.get_topics([topic_name])
34
+ topics ||= zookeeper.get_topics
35
+
36
+ destination_brokers = opts[:brokers] && opts[:brokers].split(',').map(&:to_i)
37
+ destination_brokers ||= zookeeper.get_brokers.values.map(&:id)
38
+ destination_brokers.delete(source_broker)
39
+
40
+ active_brokers = zookeeper.get_brokers.values.map(&:id)
41
+
42
+ unless (inactive_brokers = destination_brokers - active_brokers).empty?
43
+ print "ERROR: Broker #{inactive_brokers} are not currently active.\n"
44
+ exit 1
45
+ end
46
+
47
+ assignments =
48
+ generate_assignments(source_broker, topics, destination_brokers)
49
+
50
+ print "Num of topics got from zookeeper: #{topics.length}\n"
51
+ print "Num of partitions in the assignment: #{assignments.size}\n"
52
+ prompt_and_execute_assignments(assignments)
53
+ end
54
+
55
+ def generate_assignments(source_broker, topics, destination_brokers)
56
+
57
+ assignments = []
58
+ topics.each do |_, t|
59
+ partitions_by_broker = build_partitions_by_broker(t, destination_brokers)
60
+
61
+ t.partitions.each do |p|
62
+ if p.replicas.include? source_broker
63
+ replicas_size = p.replicas.length
64
+ replicas = p.replicas - [source_broker]
65
+ source_broker_is_leader = p.replicas.first == source_broker
66
+ potential_broker_ids = destination_brokers - replicas
67
+ if potential_broker_ids.empty?
68
+ print "ERROR: Not enough destination brokers to reassign topic \"#{t.name}\".\n"
69
+ exit 1
70
+ end
71
+
72
+ num_partitions_on_potential_broker =
73
+ partitions_by_broker.select { |id, _| potential_broker_ids.include? id }
74
+ assigned_broker_id = num_partitions_on_potential_broker.min_by{ |id, num| num }[0]
75
+ if source_broker_is_leader
76
+ replicas.unshift(assigned_broker_id)
77
+ else
78
+ replicas << assigned_broker_id
79
+ end
80
+ partitions_by_broker[assigned_broker_id] += 1
81
+
82
+ if replicas.length != replicas_size
83
+ STDERR.print "ERROR: Number of replicas changes after reassignment topic: #{t.name}, partition: #{p.id} \n"
84
+ exit 1
85
+ end
86
+
87
+ assignments << Assignment.new(t.name, p.id, replicas)
88
+ end
89
+ end
90
+ end
91
+
92
+ assignments
93
+ end
94
+
95
+ # Build a hash map from broker id to number of partitions on it to facilitate
96
+ # finding the broker with lowest number of partitions to help balance brokers.
97
+ def build_partitions_by_broker(topic, destination_brokers)
98
+ partitions_by_broker = Hash.new(0)
99
+ destination_brokers.each { |id| partitions_by_broker[id] = 0 }
100
+ topic.partitions.each do |p|
101
+ p.replicas.each do |r|
102
+ partitions_by_broker[r] += 1
103
+ end
104
+ end
105
+ partitions_by_broker
106
+ end
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,31 @@
1
+ module Kafkat
2
+ module Command
3
+ class ElectLeaders < Base
4
+ register_as 'elect-leaders'
5
+
6
+ usage 'elect-leaders [topic]',
7
+ 'Begin election of the preferred leaders.'
8
+
9
+ def run
10
+ topic_name = ARGV[0] && ARGV.shift
11
+ topic_names = topic_name && [topic_name]
12
+
13
+ topics = zookeeper.get_topics(topic_names)
14
+ partitions = topics.values.map(&:partitions).flatten
15
+
16
+ topics_s = topic_name ? "'#{topic_name}'" : "all topics"
17
+ print "This operation elects the preferred replicas for #{topics_s}.\n"
18
+ return unless agree("Proceed (y/n)?")
19
+
20
+ result = nil
21
+ begin
22
+ print "\nBeginning.\n"
23
+ result = admin.elect_leaders!(partitions)
24
+ print "Started.\n"
25
+ rescue Interface::Admin::ExecutionFailedError
26
+ print result
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,50 @@
1
+ module Kafkat
2
+ module Command
3
+ class Describe < Base
4
+ register_as 'partitions'
5
+
6
+ usage 'partitions [topic]',
7
+ 'Print partitions by topic.'
8
+ usage 'partitions [topic] --under-replicated',
9
+ 'Print partitions by topic (only under-replicated).'
10
+ usage 'partitions [topic] --unavailable',
11
+ 'Print partitions by topic (only unavailable).'
12
+
13
+ def run
14
+ topic_name = ARGV.shift unless ARGV[0] && ARGV[0].start_with?('--')
15
+ topic_names = topic_name && [topic_name]
16
+
17
+ @options = Trollop.options do
18
+ opt :under_replicated, "only under-replicated"
19
+ opt :unavailable, "only unavailable"
20
+ end
21
+
22
+ brokers = zookeeper.get_brokers
23
+ topics = zookeeper.get_topics(topic_names)
24
+
25
+ print_partition_header
26
+ topics.each do |name, t|
27
+ t.partitions.each do |p|
28
+ print_partition(p) if selected?(p, brokers)
29
+ end
30
+ end
31
+ end
32
+
33
+ private
34
+
35
+ def selected?(partition, brokers)
36
+ return partition.under_replicated? if only_under_replicated?
37
+ return !partition.has_leader?(brokers) if only_unavailable?
38
+ true
39
+ end
40
+
41
+ def only_under_replicated?
42
+ !!@options[:under_replicated]
43
+ end
44
+
45
+ def only_unavailable?
46
+ !!@options[:unavailable]
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,80 @@
1
+ module Kafkat
2
+ module Command
3
+ class Reassign < Base
4
+ register_as 'reassign'
5
+
6
+ usage 'reassign [topics] [--brokers <ids>] [--replicas <n>]',
7
+ 'Begin reassignment of partitions.'
8
+
9
+ def run
10
+ topic_names = ARGV.shift unless ARGV[0] && ARGV[0].start_with?('--')
11
+
12
+ all_brokers = zookeeper.get_brokers
13
+
14
+ topics = nil
15
+ if topic_names
16
+ topics_list = topic_names.split(',')
17
+ topics = zookeeper.get_topics(topics_list)
18
+ end
19
+ topics ||= zookeeper.get_topics
20
+
21
+ opts = Trollop.options do
22
+ opt :brokers, "replica set (broker IDs)", type: :string
23
+ opt :replicas, "number of replicas (count)", type: :integer
24
+ end
25
+
26
+ broker_ids = opts[:brokers] && opts[:brokers].split(',').map(&:to_i)
27
+ replica_count = opts[:replicas]
28
+
29
+ broker_ids ||= zookeeper.get_brokers.values.map(&:id)
30
+
31
+ all_brokers_id = all_brokers.values.map(&:id)
32
+ broker_ids.each do |id|
33
+ if !all_brokers_id.include?(id)
34
+ print "ERROR: Broker #{id} is not currently active.\n"
35
+ exit 1
36
+ end
37
+ end
38
+
39
+ # *** This logic is duplicated from Kakfa 0.8.1.1 ***
40
+
41
+ assignments = []
42
+ broker_count = broker_ids.size
43
+
44
+ topics.each do |_, t|
45
+ # This is how Kafka's AdminUtils determines these values.
46
+ partition_count = t.partitions.size
47
+ topic_replica_count = replica_count || t.partitions[0].replicas.size
48
+
49
+ if topic_replica_count > broker_count
50
+ print "ERROR: Replication factor (#{topic_replica_count}) is larger than brokers (#{broker_count}).\n"
51
+ exit 1
52
+ end
53
+
54
+ start_index = Random.rand(broker_count)
55
+ replica_shift = Random.rand(broker_count)
56
+
57
+ t.partitions.each do |p|
58
+ replica_shift += 1 if p.id > 0 && p.id % broker_count == 0
59
+ first_replica_index = (p.id + start_index) % broker_count
60
+
61
+ replicas = [broker_ids[first_replica_index]]
62
+
63
+ (0...topic_replica_count-1).each do |i|
64
+ shift = 1 + (replica_shift + i) % (broker_count - 1)
65
+ index = (first_replica_index + shift) % broker_count
66
+ replicas << broker_ids[index]
67
+ end
68
+
69
+ replicas.reverse!
70
+ assignments << Assignment.new(t.name, p.id, replicas)
71
+ end
72
+ end
73
+
74
+ # ****************
75
+
76
+ prompt_and_execute_assignments(assignments)
77
+ end
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,76 @@
1
+ module Kafkat
2
+ module Command
3
+ class ResignForce < Base
4
+ register_as 'resign-rewrite'
5
+
6
+ usage 'resign-rewrite <broker id>',
7
+ 'Forcibly rewrite leaderships to exclude a broker.'
8
+
9
+ usage 'resign-rewrite <broker id> --force',
10
+ 'Same as above but proceed if there are no available ISRs.'
11
+
12
+ def run
13
+ broker_id = ARGV[0] && ARGV.shift.to_i
14
+ if broker_id.nil?
15
+ puts "You must specify a broker ID."
16
+ exit 1
17
+ end
18
+
19
+ opts = Trollop.options do
20
+ opt :force, "force"
21
+ end
22
+
23
+ print "This operation rewrites leaderships in ZK to exclude broker '#{broker_id}'.\n"
24
+ print "WARNING: This is a last resort. Try the 'shutdown' command first!\n\n".red
25
+
26
+ return unless agree("Proceed (y/n)?")
27
+
28
+ brokers = zookeeper.get_brokers
29
+ topics = zookeeper.get_topics
30
+ force = opts[:force]
31
+
32
+ ops = {}
33
+ topics.each do |_, t|
34
+ t.partitions.each do |p|
35
+ next if p.leader != broker_id
36
+
37
+ alternates = p.isr.reject { |i| i == broker_id }
38
+ new_leader_id = alternates.sample
39
+
40
+ if !new_leader_id && !force
41
+ print "Partition #{t.name}-#{p.id} has no other ISRs!\n"
42
+ exit 1
43
+ end
44
+
45
+ new_leader_id ||= -1
46
+ ops[p] = new_leader_id
47
+ end
48
+ end
49
+
50
+ print "\n"
51
+ print "Summary of the new assignments:\n\n"
52
+
53
+ print "Partition\tLeader\n"
54
+ ops.each do |p, lid|
55
+ print justify("#{p.topic_name}-#{p.id}")
56
+ print justify(lid.to_s)
57
+ print "\n"
58
+ end
59
+
60
+ begin
61
+ print "\nStarting.\n"
62
+ ops.each do |p, lid|
63
+ retryable(tries: 3, on: Interface::Zookeeper::WriteConflictError) do
64
+ zookeeper.write_leader(p, lid)
65
+ end
66
+ end
67
+ rescue Interface::Zookeeper::WriteConflictError => e
68
+ print "Failed to update leaderships in ZK. Try re-running.\n\n"
69
+ exit 1
70
+ end
71
+
72
+ print "Done.\n"
73
+ end
74
+ end
75
+ end
76
+ end