kafkat-onfocusio 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +8 -0
  3. data/.rspec +2 -0
  4. data/.simplecov +5 -0
  5. data/.travis.yml +10 -0
  6. data/CHANGELOG.md +12 -0
  7. data/Gemfile +2 -0
  8. data/LICENSE.txt +202 -0
  9. data/README.md +72 -0
  10. data/Rakefile +25 -0
  11. data/_kafkat +21 -0
  12. data/bin/kafkat +4 -0
  13. data/kafkat.gemspec +34 -0
  14. data/lib/kafkat/cli.rb +71 -0
  15. data/lib/kafkat/cluster/assignment.rb +4 -0
  16. data/lib/kafkat/cluster/broker.rb +4 -0
  17. data/lib/kafkat/cluster/partition.rb +11 -0
  18. data/lib/kafkat/cluster/topic.rb +4 -0
  19. data/lib/kafkat/cluster.rb +4 -0
  20. data/lib/kafkat/command/brokers.rb +16 -0
  21. data/lib/kafkat/command/clean-indexes.rb +30 -0
  22. data/lib/kafkat/command/cluster_restart.rb +336 -0
  23. data/lib/kafkat/command/controller.rb +18 -0
  24. data/lib/kafkat/command/drain.rb +109 -0
  25. data/lib/kafkat/command/elect-leaders.rb +31 -0
  26. data/lib/kafkat/command/partitions.rb +50 -0
  27. data/lib/kafkat/command/reassign.rb +80 -0
  28. data/lib/kafkat/command/resign-rewrite.rb +76 -0
  29. data/lib/kafkat/command/set-replication-factor.rb +173 -0
  30. data/lib/kafkat/command/shutdown.rb +30 -0
  31. data/lib/kafkat/command/topics.rb +16 -0
  32. data/lib/kafkat/command/verify-reassign.rb +18 -0
  33. data/lib/kafkat/command/verify-replicas.rb +92 -0
  34. data/lib/kafkat/command.rb +70 -0
  35. data/lib/kafkat/config.rb +50 -0
  36. data/lib/kafkat/interface/admin.rb +115 -0
  37. data/lib/kafkat/interface/kafka_logs.rb +54 -0
  38. data/lib/kafkat/interface/zookeeper.rb +178 -0
  39. data/lib/kafkat/interface.rb +3 -0
  40. data/lib/kafkat/reboot.rb +0 -0
  41. data/lib/kafkat/utility/command_io.rb +21 -0
  42. data/lib/kafkat/utility/formatting.rb +68 -0
  43. data/lib/kafkat/utility/logging.rb +7 -0
  44. data/lib/kafkat/utility.rb +4 -0
  45. data/lib/kafkat/version.rb +3 -0
  46. data/lib/kafkat.rb +14 -0
  47. data/spec/factories/topic.rb +53 -0
  48. data/spec/lib/kafkat/command/cluster_restart_spec.rb +197 -0
  49. data/spec/lib/kafkat/command/drain_spec.rb +59 -0
  50. data/spec/lib/kafkat/command/verify-replicas_spec.rb +50 -0
  51. data/spec/spec_helper.rb +102 -0
  52. metadata +294 -0
@@ -0,0 +1,173 @@
1
+ module Kafkat
2
+ module Command
3
+
4
+ #
5
+ # Command to set the replication factor (RF) of a topic.
6
+ # The command accepts the topic name, the new desired replication factor,
7
+ # and, in case of an increase of the replication factor, a list of broker ids.
8
+ #
9
+ # When reducing the RF, a new partition assigment will be generated by
10
+ # removing the last partition replica of every replica set. The leader partition
11
+ # will not be removed from the replica set to prevent a leader election.
12
+ #
13
+ # When increasing the RF, a new partition assignment will be generated by allocating
14
+ # a new replica to every replica set. The new replica will be assigned to the provided
15
+ # broker list in a round robin fashion.
16
+ # If no broker id is specified on the command line, all brokers will be used.
17
+ #
18
+ #
19
+ class SetReplicationFactor < Base
20
+ register_as 'set-replication-factor'
21
+
22
+ usage 'set-replication-factor [topic] [--newrf <n>] [--brokers id[,id]]',
23
+ 'Set the replication factor of'
24
+
25
+ def run
26
+ topic_name = ARGV.shift unless ARGV[0] && ARGV[0].start_with?('--')
27
+
28
+ all_brokers = zookeeper.get_brokers
29
+ topics = topic_name && zookeeper.get_topics([topic_name])
30
+ topics ||= zookeeper.get_topics
31
+
32
+ opts = Trollop.options do
33
+ opt :brokers, "the comma-separated list of broker the new partitions must be assigned to", type: :string
34
+ opt :newrf, "the new replication factor", type: :integer, required: true
35
+ end
36
+
37
+ broker_ids = opts[:brokers] && opts[:brokers].split(',').map(&:to_i)
38
+ new_rf = opts[:newrf]
39
+
40
+ if new_rf < 1
41
+ puts "ERROR: replication factor is smaller than 1"
42
+ exit 1
43
+ end
44
+
45
+ broker_ids ||= zookeeper.get_brokers.values.map(&:id)
46
+
47
+ all_brokers_id = all_brokers.values.map(&:id)
48
+ broker_ids.each do |id|
49
+ if !all_brokers_id.include?(id)
50
+ puts "ERROR: Broker #{id} is not currently active.\n"
51
+ exit 1
52
+ end
53
+ end
54
+
55
+ broker_count = broker_ids.size
56
+ if new_rf > broker_count
57
+ puts "ERROR: Replication factor is larger than number of brokers.\n"
58
+ exit 1
59
+ end
60
+
61
+ assignments = []
62
+ topics.each do |_, t|
63
+ current_rf = t.partitions[0].replicas.size
64
+ if new_rf < current_rf
65
+ warn_reduce_brokers if opts[:brokers]
66
+ assignments += reduce_rf(t, current_rf, new_rf)
67
+ elsif new_rf > current_rf
68
+ assignments += increase_rf(t, current_rf, new_rf, broker_ids)
69
+ end
70
+ end
71
+
72
+ # ****************
73
+ if assignments.empty?
74
+ puts "No partition reassignment required"
75
+ else
76
+ print "This operation executes the following assignments:\n\n"
77
+ print_assignment_header
78
+ assignments.each { |a| print_assignment(a) }
79
+ print "\n"
80
+
81
+ return unless agree("Proceed (y/n)?")
82
+
83
+ result = nil
84
+ begin
85
+ print "\nBeginning.\n"
86
+ result = admin.reassign!(assignments)
87
+ print "Started.\n"
88
+ rescue Admin::ExecutionFailedError
89
+ print result
90
+ end
91
+ end
92
+ end
93
+
94
+
95
+ #
96
+ # For every partition, remove the last replica from the replica list.
97
+ # If the last replica is the leader, then the previous replica is removed instead.
98
+ #
99
+ def reduce_rf(topic, current_rf, new_rf)
100
+ delta_rf = current_rf - new_rf
101
+ if current_rf == 1
102
+ raise 'Current replication factor if 1. Cannot reduce further.'
103
+ end
104
+ unless delta_rf > 0
105
+ raise "New replication factor (#{new_rf}) must be smaller than current replication factor (#{current_rf})"
106
+ end
107
+ assignments = []
108
+ topic.partitions.map do |p|
109
+ new_replicas = p.replicas
110
+
111
+ (0...delta_rf).each do |_|
112
+ (0...new_replicas.size).each do |i|
113
+ if new_replicas[new_replicas.size-1-i] != p.leader
114
+ new_replicas.delete_at(new_replicas.size-1-i)
115
+ break
116
+ end
117
+ end
118
+ end
119
+
120
+ if new_replicas.size != new_rf
121
+ raise 'Unexpected state'
122
+ end
123
+ assignments << Assignment.new(topic.name, p.id, new_replicas)
124
+ end
125
+ assignments
126
+ end
127
+
128
+
129
+ #
130
+ # For every partition, filter out the brokers that already have a replica for this partition,
131
+ # then pick (new_rf - current_rf) brokers and assign them new replicas.
132
+ #
133
+ # The count of new replicas assigned to the brokers is maintained in order to uniformly assign new replicas.
134
+ #
135
+ def increase_rf(topic, current_rf, new_rf, brokers)
136
+ unless new_rf > current_rf
137
+ raise 'New replication factor must be greater than the current replication factor'
138
+ end
139
+
140
+ delta_rf = new_rf - current_rf
141
+ if delta_rf > brokers.size
142
+ raise "#{delta_rf} new replicas requested for topic #{p.topic_name} but only #{brokers.size} brokers available"
143
+ end
144
+
145
+ broker_counts = brokers.map { |b| {:id => b, :count => 0} }
146
+
147
+ assignments = []
148
+ topic.partitions.map do |p|
149
+ existing_replicas = p.replicas
150
+ pick_from = broker_counts.reject { |b| existing_replicas.include?(b[:id]) }
151
+ if delta_rf > pick_from.size
152
+ raise "Cannot create #{delta_rf} new replicas for partition #{p.topic_name}.#{p.id}, not enough brokers"
153
+ end
154
+ new_replicas = pick_from.sort { |a, b| a[:count] <=> b[:count] }[0...delta_rf]
155
+ new_replicas.each { |b| b[:count] += 1 }
156
+
157
+ final_replicas = existing_replicas + new_replicas.map { |b| b[:id] }
158
+
159
+ assignments << Assignment.new(topic.name, p.id, final_replicas)
160
+ end
161
+ assignments
162
+ end
163
+
164
+ def warn_reduce_brokers
165
+ return if @did_warn_reduce_brokers
166
+ puts "When reducing the replication factor the list of specified brokers is ignored."
167
+ puts "Once the replication factor is set, you can use the reassign command."
168
+ @did_warn_reduce_brokers = true
169
+ end
170
+
171
+ end
172
+ end
173
+ end
@@ -0,0 +1,30 @@
1
+ module Kafkat
2
+ module Command
3
+ class Resign < Base
4
+ register_as 'shutdown'
5
+
6
+ usage 'shutdown <broker id>',
7
+ 'Gracefully remove leaderships from a broker (requires JMX).'
8
+
9
+ def run
10
+ broker_id = ARGV[0] && ARGV.shift.to_i
11
+ if broker_id.nil?
12
+ puts "You must specify a broker ID."
13
+ exit 1
14
+ end
15
+
16
+ print "This operation gracefully removes leaderships from broker '#{broker_id}'.\n"
17
+ return unless agree("Proceed (y/n)?")
18
+
19
+ result = nil
20
+ begin
21
+ print "\nBeginning shutdown.\n"
22
+ result = admin.shutdown!(broker_id)
23
+ print "Started.\n"
24
+ rescue Interface::Admin::ExecutionFailedError
25
+ print result
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,16 @@
1
+ module Kafkat
2
+ module Command
3
+ class Topics < Base
4
+ register_as 'topics'
5
+
6
+ usage 'topics',
7
+ 'Print all topics.'
8
+
9
+ def run
10
+ topic_names = zookeeper.get_topic_names
11
+
12
+ topic_names.each { |name| print_topic_name(name) }
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,18 @@
1
+ module Kafkat
2
+ module Command
3
+ class VerifyReassign < Base
4
+ register_as 'verify-reassign'
5
+
6
+ usage 'verify-reassign reassign_YYYY-MM-DDThh:mm:ssZ.json',
7
+ 'Verify reassignment of partitions.'
8
+
9
+ def run
10
+ file_name = ARGV.shift
11
+
12
+ all_brokers = zookeeper.get_brokers
13
+
14
+ puts admin.verify_reassign(file_name)
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,92 @@
1
+ module Kafkat
2
+ module Command
3
+ class VerifyReplicas < Base
4
+ register_as 'verify-replicas'
5
+
6
+ usage 'verify-replicas [--topics] [--broker <id>] [--print-details] [--print-summary]',
7
+ 'Check if all partitions in a topic have same number of replicas.'
8
+
9
+ def run
10
+ opts = Trollop.options do
11
+ opt :topics, "topic names", type: :string
12
+ opt :broker, "broker ID", type: :string
13
+ opt :print_details, "show replica size of mismatched partitions", :default => false
14
+ opt :print_summary, "show summary of mismatched partitions", :default => false
15
+ end
16
+
17
+ topic_names = opts[:topics]
18
+ print_details = opts[:print_details]
19
+ print_summary = opts[:print_summary]
20
+
21
+ if topic_names
22
+ topics_list = topic_names.split(',')
23
+ topics = zookeeper.get_topics(topics_list)
24
+ end
25
+ topics ||= zookeeper.get_topics
26
+ broker = opts[:broker] && opts[:broker].to_i
27
+
28
+ partition_replica_size, partition_replica_size_stat = verify_replicas(broker, topics)
29
+
30
+ print_summary = !print_details || print_summary
31
+ print_mismatched_partitions(partition_replica_size, partition_replica_size_stat, print_details, print_summary)
32
+ end
33
+
34
+ def verify_replicas(broker, topics)
35
+ partition_replica_size = {}
36
+ partition_replica_size_stat = {}
37
+
38
+ topics.each do |_, t|
39
+ partition_replica_size[t.name] = {}
40
+ partition_replica_size_stat[t.name] = {}
41
+
42
+ t.partitions.each do |p|
43
+ replica_size = p.replicas.length
44
+
45
+ next if broker && !p.replicas.include?(broker)
46
+
47
+ partition_replica_size_stat[t.name][replica_size] ||= 0
48
+ partition_replica_size_stat[t.name][replica_size] += 1
49
+
50
+ partition_replica_size[t.name][p.id] = replica_size
51
+ end
52
+
53
+ end
54
+
55
+ return partition_replica_size, partition_replica_size_stat
56
+ end
57
+
58
+ def print_mismatched_partitions(partition_replica_size, partition_replica_size_stat, print_details, print_summary)
59
+ topic_column_width = partition_replica_size.keys.max_by(&:length).length
60
+ if print_details
61
+ printf "%-#{topic_column_width}s %-10s %-15s %-20s\n", "topic", "partition", "replica_size", "replication_factor"
62
+
63
+ partition_replica_size.each do |topic_name, partition|
64
+ replication_factor = partition_replica_size_stat[topic_name].key(partition_replica_size_stat[topic_name].values.max)
65
+
66
+ partition.each do |id, replica_size|
67
+ if replica_size != replication_factor
68
+ printf "%-#{topic_column_width}s %-10d %-15d %-20d\n", topic_name, id, replica_size, replication_factor
69
+ end
70
+ end
71
+ end
72
+ end
73
+
74
+ if print_summary
75
+ printf "%-#{topic_column_width}s %-15s %-10s %-15s %-20s\n", "topic", "replica_size", "count", "percentage", "replication_factor"
76
+ partition_replica_size_stat.each do |topic_name, partition|
77
+ if partition.values.size > 1
78
+ replication_factor = partition_replica_size_stat[topic_name].key(partition_replica_size_stat[topic_name].values.max)
79
+ num_partitions = 0.0
80
+ partition.each { |key, value| num_partitions += value }
81
+
82
+ partition.each do |replica_size, cnt|
83
+ printf "%-#{topic_column_width}s %-15d %-10d %-15d %-20d\n", topic_name, replica_size, cnt, (cnt * 100 /num_partitions)
84
+ .to_i, replication_factor
85
+ end
86
+ end
87
+ end
88
+ end
89
+ end
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,70 @@
1
+ module Kafkat
2
+ module Command
3
+ class NotFoundError < StandardError; end
4
+
5
+ def self.all
6
+ @all ||= {}
7
+ end
8
+
9
+ def self.get(name)
10
+ klass = all[name.downcase]
11
+ raise NotFoundError if !klass
12
+ klass
13
+ end
14
+
15
+ class Base
16
+ include Formatting
17
+ include CommandIO
18
+ include Kafkat::Logging
19
+
20
+ attr_reader :config
21
+
22
+ class << self
23
+ attr_reader :command_name
24
+ end
25
+
26
+ def self.register_as(name)
27
+ @command_name = name
28
+ Command.all[name] = self
29
+ end
30
+
31
+ def self.usages
32
+ @usages ||= []
33
+ end
34
+
35
+ def self.usage(format, description)
36
+ usages << [format, description]
37
+ end
38
+
39
+ def initialize(config)
40
+ @config = config
41
+ end
42
+
43
+ def run
44
+ raise NotImplementedError
45
+ end
46
+
47
+ def admin
48
+ @admin ||= begin
49
+ Interface::Admin.new(config)
50
+ end
51
+ end
52
+
53
+ def zookeeper
54
+ @zookeeper ||= begin
55
+ Interface::Zookeeper.new(config)
56
+ end
57
+ end
58
+
59
+ def kafka_logs
60
+ @kafka_logs ||= begin
61
+ Interface::KafkaLogs.new(config)
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
67
+
68
+ # Require all of the commands.
69
+ command_glob = File.expand_path("../command/*.rb", __FILE__)
70
+ Dir[command_glob].each { |f| require f }
@@ -0,0 +1,50 @@
1
+ module Kafkat
2
+ class Config
3
+ CONFIG_PATHS = [
4
+ '~/.kafkatcfg',
5
+ '/etc/kafkatcfg'
6
+ ]
7
+
8
+ class NotFoundError < StandardError; end
9
+ class ParseError < StandardError; end
10
+
11
+ attr_reader :kafka_path
12
+ attr_reader :log_path
13
+ attr_reader :zk_path
14
+ attr_reader :json_files_path
15
+
16
+ def self.load!
17
+ string = nil
18
+ e = nil
19
+
20
+ CONFIG_PATHS.each do |rel_path|
21
+ begin
22
+ path = File.expand_path(rel_path)
23
+ string = File.read(path)
24
+ break
25
+ rescue => e
26
+ end
27
+ end
28
+
29
+ raise e if e && string.nil?
30
+
31
+ json = JSON.parse(string)
32
+ self.new(json)
33
+
34
+ rescue Errno::ENOENT
35
+ raise NotFoundError
36
+ rescue JSON::JSONError
37
+ raise ParseError
38
+ end
39
+
40
+ def initialize(json)
41
+ @kafka_path = json['kafka_path']
42
+ @log_path = json['log_path']
43
+ @zk_path = json['zk_path']
44
+ @json_files_path = json['json_files_path']
45
+ if !@json_files_path || !File.exist?(@json_files_path)
46
+ raise ArgumentError, "The directory \"json_files_path\": \"#{@json_files_path}\" does not exit."
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,115 @@
1
+ require 'time'
2
+
3
+ module Kafkat
4
+ module Interface
5
+ class Admin
6
+ class ExecutionFailedError < StandardError; end
7
+
8
+ attr_reader :kafka_path
9
+ attr_reader :zk_path
10
+ attr_reader :json_files_path
11
+
12
+ def initialize(config)
13
+ @kafka_path = config.kafka_path
14
+ @zk_path = config.zk_path
15
+ @json_files_path = config.json_files_path
16
+ end
17
+
18
+ def elect_leaders!(partitions)
19
+ file = File.new File.join(@json_files_path, "elect-leaders_#{Time.now.xmlschema}.json"), "w"
20
+
21
+ json_partitions = []
22
+ partitions.each do |p|
23
+ json_partitions << {
24
+ 'topic' => p.topic_name,
25
+ 'partition' => p.id
26
+ }
27
+ end
28
+
29
+ json = {'partitions' => json_partitions}
30
+ file.write(JSON.dump(json))
31
+ file.close
32
+
33
+ puts "Using JSON file: " + file.path
34
+
35
+ run_tool(
36
+ 'kafka-preferred-replica-election',
37
+ '--path-to-json-file', file.path
38
+ )
39
+ end
40
+
41
+ def reassign!(assignments)
42
+ file_name = "reassign_#{Time.now.xmlschema}.json"
43
+ file = File.new File.join(@json_files_path, file_name), "w"
44
+
45
+ json_partitions = []
46
+ assignments.each do |a|
47
+ json_partitions << {
48
+ 'topic' => a.topic_name,
49
+ 'partition' => a.partition_id,
50
+ 'replicas' => a.replicas
51
+ }
52
+ end
53
+
54
+ json = {
55
+ 'partitions' => json_partitions,
56
+ 'version' => 1
57
+ }
58
+
59
+ file.write(JSON.dump(json))
60
+ file.close
61
+
62
+ puts "Using JSON file: " + file.path
63
+ puts "Run this command to check the status: kafkat verify-reassign #{file_name}"
64
+
65
+ run_tool(
66
+ 'kafka-reassign-partitions',
67
+ '--execute',
68
+ '--reassignment-json-file', file.path
69
+ )
70
+ end
71
+
72
+ def verify_reassign(file_name)
73
+ file =
74
+ if File.exist? file_name
75
+ File.new file_name
76
+ else
77
+ File.new File.join(@json_files_path, file_name)
78
+ end
79
+
80
+ puts "Using JSON file: " + file.path
81
+
82
+ run_tool(
83
+ 'kafka-reassign-partitions',
84
+ '--verify',
85
+ '--reassignment-json-file', file.path
86
+ )
87
+ end
88
+
89
+ def shutdown!(broker_id, options={})
90
+ args = ['--broker', broker_id]
91
+ args += ['--num.retries', options[:retries]] if options[:retries]
92
+ args += ['--retry.interval.ms', option[:interval]] if options[:interval]
93
+
94
+ run_tool(
95
+ 'kafka-run-class',
96
+ 'kafka.admin.ShutdownBroker',
97
+ *args
98
+ )
99
+ end
100
+
101
+ def run_tool(name, *args)
102
+ path = File.join(kafka_path, "bin/#{name}.sh")
103
+ # The scripts in the Confluent package does not have .sh extensions
104
+ if !File.exist? path
105
+ path = File.join(kafka_path, "bin/#{name}")
106
+ end
107
+ args += ['--zookeeper', "\"#{zk_path}\""]
108
+ args_string = args.join(' ')
109
+ result = `#{path} #{args_string}`
110
+ raise ExecutionFailedError if $?.to_i > 0
111
+ result
112
+ end
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,54 @@
1
+ module Kafkat
2
+ module Interface
3
+ class KafkaLogs
4
+ UNTRUNCATED_SIZE = 10 * 1024 * 1024 # 1MB
5
+
6
+ class NoLogsError < StandardError; end
7
+ class KafkaRunningError < StandardError; end
8
+
9
+ attr_reader :log_path
10
+
11
+ def initialize(config)
12
+ @log_path = config.log_path
13
+ end
14
+
15
+ def clean_indexes!
16
+ check_exists
17
+
18
+ to_remove = []
19
+ lock_for_write do
20
+ index_glob = File.join(log_path, '**/*.index')
21
+ Dir[index_glob].each do |index_path|
22
+ size = File.size(index_path)
23
+ to_remove << index_path if size == UNTRUNCATED_SIZE
24
+ end
25
+ end
26
+
27
+ to_remove.each do |path|
28
+ print "Removing #{path}.\n"
29
+ File.unlink(path)
30
+ end
31
+
32
+ to_remove.size
33
+ end
34
+
35
+ private
36
+
37
+ def check_exists
38
+ raise NoLogsError unless File.exists?(log_path)
39
+ end
40
+
41
+ def lock_for_write
42
+ File.open(lockfile_path, File::CREAT) do |lockfile|
43
+ locked = lockfile.flock(File::LOCK_EX | File::LOCK_NB)
44
+ raise KafkaRunningError unless locked
45
+ yield
46
+ end
47
+ end
48
+
49
+ def lockfile_path
50
+ File.join(log_path, '.lock')
51
+ end
52
+ end
53
+ end
54
+ end