braavos 0.0.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +12 -0
- data/Gemfile.lock +44 -0
- data/README.rdoc +113 -0
- data/Rakefile +23 -0
- data/bin/braavos +20 -0
- data/lib/braavos.rb +179 -0
- data/lib/braavos/cli.rb +82 -0
- data/lib/braavos/command.rb +30 -0
- data/lib/braavos/config.rb +48 -0
- data/lib/braavos/parallel.rb +104 -0
- data/lib/braavos/service.rb +48 -0
- data/lib/braavos/service/cassandra.rb +187 -0
- data/lib/braavos/service/elasticsearch.rb +15 -0
- data/lib/braavos/storage.rb +7 -0
- data/lib/braavos/storage/file.rb +48 -0
- data/lib/braavos/storage/s3.rb +41 -0
- data/lib/braavos/storage/storage_base.rb +48 -0
- data/lib/braavos/template.rb +49 -0
- data/lib/braavos/version.rb +3 -0
- data/template/cassandra/clear_snapshot.sh.erb +1 -0
- data/template/cassandra/create_snapshot.sh.erb +1 -0
- data/template/cassandra/dump_schema.sh.erb +4 -0
- data/template/cassandra/system_bundle.sh.erb +18 -0
- data/template/cassandra/table_bundle_restore.sh.erb +44 -0
- data/template/cassandra/table_bundle_upload.sh.erb +27 -0
- data/test/braavos/command_test.rb +34 -0
- data/test/braavos/config_test.rb +66 -0
- data/test/braavos/parallel_test.rb +29 -0
- data/test/braavos/template_test.rb +36 -0
- data/test/template/nested/inner.sh.erb +1 -0
- data/test/template/simple.txt.erb +1 -0
- data/test/test_helper.rb +17 -0
- metadata +125 -0
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'tempfile'
|
2
|
+
|
3
|
+
class Braavos::Command
|
4
|
+
class CommandError < StandardError; end
|
5
|
+
|
6
|
+
NUMBER_OF_TRIES = 3
|
7
|
+
|
8
|
+
def execute(script, input)
|
9
|
+
script_tf = Tempfile.new('brav-cmd')
|
10
|
+
script_tf.write(script)
|
11
|
+
script_tf.close
|
12
|
+
FileUtils.chmod("+rx", script_tf.path)
|
13
|
+
|
14
|
+
execute_with_retry script, script_tf, input
|
15
|
+
ensure
|
16
|
+
script_tf.unlink
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def execute_with_retry(script, script_tf, input)
|
22
|
+
tries ||= 0
|
23
|
+
Braavos.logger.debug("Attempt #{tries + 1} - Command Execute: #{script} [#{input.join(' ')}]")
|
24
|
+
system(script_tf.path, *input) || raise(CommandError, "Command execution failed: #{script}")
|
25
|
+
rescue CommandError
|
26
|
+
tries += 1
|
27
|
+
retry if tries < NUMBER_OF_TRIES
|
28
|
+
raise
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
class Braavos::Config
|
2
|
+
|
3
|
+
attr_accessor :settings, :name, :environment, :service, :bucket_name, :data_loc, :sync_loc, :temp_loc,
|
4
|
+
:discovery, :backup_prefix, :storage_backing, :parallel_jobs
|
5
|
+
|
6
|
+
def initialize(config)
|
7
|
+
@settings = config
|
8
|
+
config.each do |k,v|
|
9
|
+
send("#{k}=", v) if respond_to?("#{k}=")
|
10
|
+
end
|
11
|
+
self.backup_prefix = nil if backup_prefix && backup_prefix.strip.size == 0
|
12
|
+
self.storage_backing ||= 's3'
|
13
|
+
self.temp_loc ||= ''
|
14
|
+
|
15
|
+
FileUtils.mkdir_p temp_loc unless File.directory?(temp_loc)
|
16
|
+
raise "Missing temp location: #{temp_loc}" unless File.directory?(temp_loc)
|
17
|
+
end
|
18
|
+
|
19
|
+
def service_class
|
20
|
+
@service_class ||= -> do
|
21
|
+
Object.const_get("Braavos::Service::#{service.capitalize}")
|
22
|
+
end.call
|
23
|
+
end
|
24
|
+
|
25
|
+
def storage_class
|
26
|
+
@storage_class ||= -> do
|
27
|
+
Object.const_get("Braavos::Storage::#{storage_backing.capitalize}")
|
28
|
+
end.call
|
29
|
+
end
|
30
|
+
|
31
|
+
def backup_path(reset=false)
|
32
|
+
remove_instance_variable(:@backup_path) if reset
|
33
|
+
@backup_path ||= -> do
|
34
|
+
path = [backup_prefix, name, environment, service].compact
|
35
|
+
File.join(*path)
|
36
|
+
end.call
|
37
|
+
end
|
38
|
+
|
39
|
+
def get_regex(key)
|
40
|
+
if (regex = settings[key]) && regex.size > 2
|
41
|
+
if regex[0] == regex[-1] && regex[0] == '/'
|
42
|
+
regex = regex[1...-1]
|
43
|
+
end
|
44
|
+
/#{regex}/
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
require 'tempfile'
|
2
|
+
|
3
|
+
class Braavos::Parallel
|
4
|
+
|
5
|
+
attr_accessor :parallel_cmd, :parallel_jobs
|
6
|
+
|
7
|
+
#wget http://ftp.gnu.org/gnu/parallel/parallel-latest.tar.bz2
|
8
|
+
#tar -xvjf parallel*
|
9
|
+
#cd parallel*
|
10
|
+
#./configure && make && sudo make install
|
11
|
+
|
12
|
+
def initialize(command = "parallel", parallel_jobs = "100%")
|
13
|
+
@parallel_cmd = command
|
14
|
+
@parallel_jobs = parallel_jobs
|
15
|
+
|
16
|
+
validate_parallel_installed
|
17
|
+
validate_timeout_installed
|
18
|
+
end
|
19
|
+
|
20
|
+
def execute(script, input, options={})
|
21
|
+
script_temp_file = Tempfile.new('brav-parl-scr')
|
22
|
+
script_temp_file.write(script)
|
23
|
+
script_temp_file.close
|
24
|
+
FileUtils.chmod("+rx", script_temp_file.path)
|
25
|
+
|
26
|
+
input_files = generate_input_files(input)
|
27
|
+
|
28
|
+
begin
|
29
|
+
input_file_names = input_files.map{|inf| inf.path}.join(' ')
|
30
|
+
command = generate_command(script_temp_file, input_file_names, options)
|
31
|
+
Braavos.logger.info("Parallel Execute: #{command}")
|
32
|
+
|
33
|
+
rval = execute_command command
|
34
|
+
|
35
|
+
unless rval
|
36
|
+
Braavos.logger.info("Retrying failed parallel jobs.")
|
37
|
+
retry_command = generate_command(script_temp_file, input_file_names, options, true)
|
38
|
+
rval = execute_command retry_command
|
39
|
+
end
|
40
|
+
|
41
|
+
raise StandardError, "Failed to run backup due to errored jobs" unless rval
|
42
|
+
ensure
|
43
|
+
input_files.each do |inf| inf.close! end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def generate_input_files(input)
|
48
|
+
input_files = []
|
49
|
+
|
50
|
+
if (first = input.first) && first.is_a?(Array)
|
51
|
+
(0..first.size).each do |i|
|
52
|
+
input_tf = Tempfile.new('brav-parl-inp')
|
53
|
+
input_files << input_tf
|
54
|
+
File.open(input_tf.path, 'w') do |f|
|
55
|
+
input.each do |minput|
|
56
|
+
f.puts minput[i]
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
else
|
61
|
+
input_tf = Tempfile.new('parl-inp')
|
62
|
+
input_files << input_tf
|
63
|
+
File.open(input_tf.path, 'w') do |f|
|
64
|
+
input.each do |minput|
|
65
|
+
f.puts minput
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
input_files
|
71
|
+
end
|
72
|
+
|
73
|
+
def generate_command(script_temp_file, input_file_names, options, resume_failed = false)
|
74
|
+
joblog_path = "/tmp/braavos_joblog.txt"
|
75
|
+
if resume_failed
|
76
|
+
"parallel --no-notice --joblog #{joblog_path} --resume-failed -j #{parallel_jobs} #{options[:parallel_opts] || ''} --xapply #{script_temp_file.path} :::: #{input_file_names} "
|
77
|
+
else
|
78
|
+
"parallel --no-notice --joblog #{joblog_path} -j #{parallel_jobs} #{options[:parallel_opts] || ''} --xapply #{script_temp_file.path} :::: #{input_file_names} "
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def execute_command(command)
|
83
|
+
system(command)
|
84
|
+
success = $?.exitstatus == 0
|
85
|
+
Braavos.logger.warn("Parallel execution failed with status #{$?.exitstatus}") unless success
|
86
|
+
success
|
87
|
+
end
|
88
|
+
|
89
|
+
def validate_parallel_installed
|
90
|
+
`#{parallel_cmd} --version`
|
91
|
+
rescue StandardError
|
92
|
+
raise ArgumentError, "GNU parallel not available"
|
93
|
+
end
|
94
|
+
|
95
|
+
def validate_timeout_installed
|
96
|
+
`timeout --version`
|
97
|
+
rescue StandardError
|
98
|
+
begin
|
99
|
+
`gtimeout --version`
|
100
|
+
rescue StandardError
|
101
|
+
raise ArgumentError, "timeout from GNU coreutils is not available"
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
module Braavos::Service
|
2
|
+
autoload :Cassandra, 'braavos/service/cassandra'
|
3
|
+
autoload :Elasticsearch, 'braavos/service/elasticsearch'
|
4
|
+
|
5
|
+
class << self
|
6
|
+
|
7
|
+
def instance_id
|
8
|
+
@instance_id ||= -> {
|
9
|
+
instance_id = case Braavos.config.discovery
|
10
|
+
when 'ec2'
|
11
|
+
`curl -s http://169.254.169.254/latest/meta-data/instance-id`.chomp.downcase
|
12
|
+
else
|
13
|
+
`hostname -s`.chomp.downcase
|
14
|
+
end
|
15
|
+
Braavos.logger.info "Instance ID: #{instance_id}"
|
16
|
+
instance_id
|
17
|
+
}.call
|
18
|
+
end
|
19
|
+
|
20
|
+
def public_host_ip
|
21
|
+
@public_host_ip ||= -> {
|
22
|
+
public_host_ip = case Braavos.config.discovery
|
23
|
+
when 'ec2'
|
24
|
+
`curl -s http://169.254.169.254/latest/meta-data/public-ipv4`.chomp.downcase
|
25
|
+
else
|
26
|
+
"127.0.0.1"
|
27
|
+
end
|
28
|
+
public_host_ip
|
29
|
+
}.call
|
30
|
+
end
|
31
|
+
|
32
|
+
def local_host_ip
|
33
|
+
@local_host_ip ||= -> {
|
34
|
+
local_host_ip = case Braavos.config.discovery
|
35
|
+
when 'ec2'
|
36
|
+
`curl -s http://169.254.169.254/latest/meta-data/local-ipv4`.chomp.downcase
|
37
|
+
else
|
38
|
+
"127.0.0.1"
|
39
|
+
end
|
40
|
+
local_host_ip
|
41
|
+
}.call
|
42
|
+
end
|
43
|
+
|
44
|
+
def full_backup_id(time=Time.now, format="%Y%m%d-%H%M%S")
|
45
|
+
time.strftime(format)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,187 @@
|
|
1
|
+
require 'tmpdir'
|
2
|
+
|
3
|
+
class Braavos::Service::Cassandra
|
4
|
+
|
5
|
+
DEFAULT_KEYSPACES_SYSTEM = ['system', 'system_auth', 'system_traces']
|
6
|
+
|
7
|
+
def token
|
8
|
+
@token ||= safely_retrieve_token
|
9
|
+
end
|
10
|
+
|
11
|
+
def keyspaces
|
12
|
+
@keyspaces ||= -> do
|
13
|
+
result = Dir[File.join(Braavos.config.data_loc, '*')].map do |d|
|
14
|
+
d.sub(/\A#{Braavos.config.data_loc}\//, '')
|
15
|
+
end
|
16
|
+
Braavos.logger.debug("Found cassandra keyspaces: #{result}")
|
17
|
+
result
|
18
|
+
end.call
|
19
|
+
end
|
20
|
+
|
21
|
+
def keyspaces_data
|
22
|
+
@keyspaces_data ||= -> do
|
23
|
+
if regex = Braavos.config.get_regex('cassandra_keyspaces_data')
|
24
|
+
Braavos.logger.debug("Cassandra Data Keyspaces: regex = #{regex}")
|
25
|
+
keyspaces.select do |k| regex =~ k end
|
26
|
+
else
|
27
|
+
keyspaces - DEFAULT_KEYSPACES_SYSTEM
|
28
|
+
end
|
29
|
+
end.call
|
30
|
+
end
|
31
|
+
|
32
|
+
def keyspaces_system
|
33
|
+
@keyspaces_system ||= -> do
|
34
|
+
if regex = Braavos.config.get_regex('cassandra_keyspaces_system')
|
35
|
+
Braavos.logger.debug("Cassandra System Keyspaces: regex = #{regex}")
|
36
|
+
keyspaces.select do |k| regex =~ k end
|
37
|
+
else
|
38
|
+
DEFAULT_KEYSPACES_SYSTEM & keyspaces
|
39
|
+
end
|
40
|
+
end.call
|
41
|
+
end
|
42
|
+
|
43
|
+
def backup_full
|
44
|
+
backup_id = Braavos::Service.full_backup_id
|
45
|
+
backup_path = File.join(Braavos.config.backup_path, 'full', backup_id, Braavos.storage.find_node_id)
|
46
|
+
data_path = File.join(Braavos.config.backup_path, 'data', Braavos.storage.find_node_id)
|
47
|
+
|
48
|
+
if Braavos.storage.has_success?(backup_path)
|
49
|
+
raise "Backup currently exists: #{backup_id} - #{Braavos.storage.script_path(backup_path)}"
|
50
|
+
else
|
51
|
+
Braavos.storage.clear_result(backup_path)
|
52
|
+
end
|
53
|
+
|
54
|
+
Dir.mktmpdir('brav-bkup') do |tmpd|
|
55
|
+
File.write(File.join(tmpd, 'cluster.json'), JSON.pretty_generate(Braavos.storage.get_cluster))
|
56
|
+
|
57
|
+
write_whoami(tmpd)
|
58
|
+
keyspaces_data.each do |keyspace|
|
59
|
+
write_describering(tmpd, keyspace)
|
60
|
+
end
|
61
|
+
|
62
|
+
Braavos.command.execute(Braavos.template.load_template('cassandra/system_bundle.sh.erb'), [tmpd, Braavos.config.data_loc, *keyspaces_system])
|
63
|
+
|
64
|
+
Braavos.parallel.execute(Braavos.template.load_template('cassandra/dump_schema.sh.erb', local_host_ip: Braavos::Service.local_host_ip), keyspaces_data.map{|k| [k, tmpd]})
|
65
|
+
|
66
|
+
Dir["#{tmpd}/*"].each do |f|
|
67
|
+
Braavos.storage.write_file(File.join(backup_path, File.basename(f)), file: f)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
script = Braavos.template.load_template('cassandra/create_snapshot.sh.erb')
|
72
|
+
keyspaces_data.each do |keyspace|
|
73
|
+
Braavos.command.execute(script, [keyspace, backup_id])
|
74
|
+
end
|
75
|
+
|
76
|
+
table_list = list_tables("snapshots/#{backup_id}")
|
77
|
+
|
78
|
+
contents = Hash[tables: table_list.map do |t| "#{t[1]}.tgz" end]
|
79
|
+
Braavos.storage.write_file(File.join(backup_path, 'contents.json'), JSON.pretty_generate(contents))
|
80
|
+
|
81
|
+
# remove tables that are unchanged
|
82
|
+
data_listings = Braavos.storage.list_dir(data_path).map do |k, v|
|
83
|
+
k.sub(/^#{data_path}\//, '').sub(/\.tgz\Z/, '')
|
84
|
+
end
|
85
|
+
table_list.delete_if do |t|
|
86
|
+
data_listings.include? t[1]
|
87
|
+
end
|
88
|
+
Braavos.logger.debug("executing table_list: #{table_list}")
|
89
|
+
Braavos.logger.info("Processing Table Count: #{table_list.size}")
|
90
|
+
|
91
|
+
script = Braavos.template.load_template('cassandra/table_bundle_upload.sh.erb')
|
92
|
+
script_input = table_list.map do |t|
|
93
|
+
[t[0], Braavos.storage.script_path(File.join(data_path, "#{t[1]}.tgz"))]
|
94
|
+
end
|
95
|
+
Braavos.parallel.execute(script, script_input) if script_input.size > 0
|
96
|
+
|
97
|
+
Braavos.storage.write_file(File.join(backup_path, '_COMPLETED'), '')
|
98
|
+
rescue => e
|
99
|
+
begin
|
100
|
+
Braavos.storage.write_file(File.join(backup_path, '_FAILED'), "#{e.message}\n#{e.backtrace}")
|
101
|
+
rescue => ig
|
102
|
+
Braavos.logger.error("_FAILED failed, ignoring")
|
103
|
+
end
|
104
|
+
raise e
|
105
|
+
ensure
|
106
|
+
script = Braavos.template.load_template('cassandra/clear_snapshot.sh.erb')
|
107
|
+
keyspaces_data.each do |keyspace|
|
108
|
+
begin
|
109
|
+
Braavos.command.execute(script, [keyspace, backup_id])
|
110
|
+
rescue => e
|
111
|
+
Braavos.logger.error("clear snapshot #{keyspace} failed, ignoring")
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def restore(backup_loc, restore_loc)
|
117
|
+
backup_path = File.join(Braavos.config.backup_path, backup_loc, Braavos.storage.find_node_id)
|
118
|
+
data_path = File.join(Braavos.config.backup_path, 'data', Braavos.storage.find_node_id)
|
119
|
+
|
120
|
+
Braavos.storage.load_file(File.join(backup_path, '_COMPLETED'))
|
121
|
+
|
122
|
+
contents = JSON.parse(Braavos.storage.load_file(File.join(backup_path, 'contents.json')))
|
123
|
+
|
124
|
+
script = Braavos.template.load_template('cassandra/table_bundle_restore.sh.erb')
|
125
|
+
script_input = contents['tables'].map do |t|
|
126
|
+
[Braavos.storage.script_path(File.join(data_path, t)), File.join(restore_loc, File.dirname(t))]
|
127
|
+
end
|
128
|
+
Braavos.parallel.execute(script, script_input) if script_input.size > 0
|
129
|
+
|
130
|
+
Braavos.logger.info("Restore completed: #{backup_loc} to #{restore_loc}")
|
131
|
+
end
|
132
|
+
|
133
|
+
def restore_incr
|
134
|
+
|
135
|
+
end
|
136
|
+
|
137
|
+
# return [[disk_loc, 'table_name']]
|
138
|
+
def list_tables(location)
|
139
|
+
# Example for table_name:
|
140
|
+
# /usr/local/var/lib/cassandra/data/place_directory_development/Places/snapshots/TODAY/place_directory_development-Places-ib-1-CompressionInfo.db
|
141
|
+
# returned as "place_directory_development/Places/place_directory_development-Places-ib-1"
|
142
|
+
tables = Set.new
|
143
|
+
Dir[File.join(Braavos.config.data_loc, '**', location, '*')].each do |file|
|
144
|
+
next if file =~ /.json\Z/ # Some versions of cassandra use Table and Table.index json files in the sstable storage location
|
145
|
+
file.sub!(/-[\w.]+\Z/, '')
|
146
|
+
if match = file.match(/\A#{Braavos.config.data_loc}\/([\w\/]+)\/#{location}\/([-\w\.]+)\Z/)
|
147
|
+
ks_table, ssfile = match.captures
|
148
|
+
tables << [file, File.join(ks_table, ssfile)]
|
149
|
+
else
|
150
|
+
Braavos.logger.warn("Found unexpected file in snapshot: #{file}")
|
151
|
+
end
|
152
|
+
end
|
153
|
+
tables.to_a
|
154
|
+
end
|
155
|
+
|
156
|
+
def find_snapshots
|
157
|
+
results = `find #{Braavos.config.data_loc} -type d -wholename '*/snapshots/*'`.split("\n").select {|s| not s =~ /\/_.*$/ }
|
158
|
+
|
159
|
+
end
|
160
|
+
|
161
|
+
private
|
162
|
+
|
163
|
+
def write_describering(directory, keyspace)
|
164
|
+
path = File.join(directory, "#{keyspace}_describering.txt")
|
165
|
+
File.open(path, "w") do |f|
|
166
|
+
f.puts `nodetool describering #{keyspace}`
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
def write_whoami(directory)
|
171
|
+
path = File.join(directory, "whoami.txt")
|
172
|
+
File.open(path, "w") do |f|
|
173
|
+
f.puts "instance_id:#{Braavos::Service.instance_id}"
|
174
|
+
f.puts "local_host_ip:#{Braavos::Service.local_host_ip}"
|
175
|
+
f.puts "public_host_ip:#{Braavos::Service.public_host_ip}"
|
176
|
+
f.puts "token:#{token}"
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
def safely_retrieve_token
|
181
|
+
tries ||= 2
|
182
|
+
`nodetool info | head -1 | cut -d : -f 2 | sed -e 's/^[ \t]*//'`
|
183
|
+
rescue StandardError
|
184
|
+
retry unless (tries -= 1).zero?
|
185
|
+
raise
|
186
|
+
end
|
187
|
+
end
|