backarch 0.1.15

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 39179cbea17666cbbb5a280fbe00e8cddc62a2c4
4
+ data.tar.gz: 2f11255acfb638caa374316974e9d62075ba5dab
5
+ SHA512:
6
+ metadata.gz: ae56c4ec655a418e0486574aa38fdc51d5a009f7eef74aa5c62cf8fb096c33783256d870d6f7de0db63d1b697f68f64e7ddfb62a76ea9fbbb0a35047f9fc8f48
7
+ data.tar.gz: 79616e3cf7d6a2e753f4f65e73967ef5799d8b8227b28e329687abc918ad3f91bbfb101a1966feba38e463222aacd8aab35ee2b50350d832e6052a96608c3e98
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in backarch.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Chris DiMartino
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,84 @@
1
+ # Backarch
2
+
3
+ Ability to backup and restore ElasticSearch to AWS S3.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'backarch'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install backarch
18
+
19
+ ## Usage
20
+
21
+ Common usage of backarch involves passing the following:
22
+ 1. configuration file (-c)
23
+ 2. input (-i)
24
+ 3. output (-o)
25
+ 4. command (eg: elasticsearch)
26
+ 5. subcommand (eg: archive)
27
+
28
+ ##### Example:
29
+
30
+ ```
31
+ snapshot_archive -c samples/elasticsearch_archive.yaml -i /tmp/elasticsearch_snapshots/full -o /tmp/elasticsearch_snapshots/archive elasticsearch archive
32
+ ```
33
+
34
+ Further examples can be found in the samples/ folder.
35
+
36
+ ### Backarch examples (see samples/ folder)
37
+
38
+ #### Commands below will backup and then restore elasticsearch from s3
39
+
40
+ 1. ./samples/elasticsearch_snapshot.sh # Create local snapshot
41
+ 2. ./samples/elasticsearch_archive.sh # Push snapshot to s3
42
+ 3. ./samples/elasticsearch_restore.sh
43
+
44
+ #### Backup
45
+
46
+ This consists of the follow two steps
47
+
48
+ 1. ./samples/elasticsearch_snapshot.sh # Create local snapshot
49
+ 2. ./samples/elasticsearch_archive.sh # Push snapshot to s3
50
+
51
+ ##### Snapshot
52
+
53
+ ```
54
+ DATE=`date +%Y%m%d`
55
+ snapshot_archive -c tmp/elasticsearch_archive.yaml -i /usr/local/var/elasticsearch/ -o /tmp/elasticsearch_snapshots -l debug -d $DATE elasticsearch snapshot
56
+ ```
57
+
58
+ ##### Archive
59
+
60
+ ```
61
+ DATE=`date +%Y%m%d`
62
+ snapshot_archive -c samples/elasticsearch_archive.yaml -i /tmp/elasticsearch_snapshots/full -o /tmp/elasticsearch_snapshots/archive -l debug -d $DATE elasticsearch archive
63
+ ```
64
+
65
+ #### Restore
66
+
67
+ Restoration involves downloading a multivolume tar file from s3 and then extracting the contents:
68
+
69
+ 3. ./samples/elasticsearch_restore.sh
70
+
71
+ ##### Shell
72
+
73
+ ```
74
+ DATE=`date +%Y%m%d`
75
+ snapshot_archive -c tmp/elasticsearch_archive.yaml -i s3://backups.qa.data-axle.infogroup.com/development/elasticsearch/full/place_directory/20140606/nil/20140606 -o /tmp/elasticsearch_restoration -l debug elasticsearch restore
76
+ ```
77
+
78
+ ## Contributing
79
+
80
+ 1. Fork it
81
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
82
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
83
+ 4. Push to the branch (`git push origin my-new-feature`)
84
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/backarch.gemspec ADDED
@@ -0,0 +1,32 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'backarch/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "backarch"
8
+ spec.version = Backarch::VERSION
9
+ spec.authors = ["Chris DiMartino"]
10
+ spec.email = ["chris.dimartino@infogroup.com"]
11
+ spec.description = %q{Backup and archival utility for cassandra/elasticsearch}
12
+ spec.summary = %q{Backup and archival to cloud}
13
+ spec.homepage = ""
14
+ spec.license = ""
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.3"
22
+ spec.add_development_dependency "rake"
23
+
24
+ %w{
25
+ logger
26
+ slop
27
+ aws-sdk
28
+ fork
29
+ fog
30
+ gli
31
+ }.map { |gem| spec.add_dependency gem }
32
+ end
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'backarch'
4
+ exit Backarch.run
data/lib/backarch.rb ADDED
@@ -0,0 +1,40 @@
1
+ require 'logger'
2
+
3
+ require 'backarch/archive'
4
+ require 'backarch/config'
5
+ require 'backarch/snapshot'
6
+ require 'backarch/snapshot/cassandra'
7
+ require 'backarch/snapshot/elasticsearch'
8
+ require 'backarch/version'
9
+ require 'backarch/parallel_downloader'
10
+ require 'backarch/tar_wrapper'
11
+ require 'backarch/tar_extracter'
12
+ require 'backarch/elasticsearch_restoration'
13
+
14
+ class String
15
+ def pass
16
+ yield self
17
+ end
18
+ end
19
+
20
+ module Backarch
21
+ LOG = Logger.new STDERR
22
+ LOG.level = Logger::INFO
23
+
24
+ NODE_NAME = `ec2metadata --instance-id`.chomp rescue 'nil'
25
+ ADDRESS = `ec2metadata --local-ipv4`.chomp rescue 'nil'
26
+
27
+ LOG_LEVELS = {
28
+ 'fatal' => ::Logger::FATAL,
29
+ 'error' => ::Logger::ERROR,
30
+ 'warn' => ::Logger::WARN,
31
+ 'info' => ::Logger::INFO,
32
+ 'debug' => ::Logger::DEBUG
33
+ }
34
+
35
+ class << self
36
+ def run
37
+ Config.init
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,148 @@
1
+ require 'fork'
2
+ require 'fog'
3
+ require 'aws-sdk'
4
+ require 'fileutils'
5
+ require 'tmpdir'
6
+
7
+ module Backarch
8
+ class Archive
9
+ class << self
10
+ def run
11
+ o = new
12
+ o.create
13
+ Config.config.each_pair do |provider, params|
14
+ o.push_archive_to provider, params
15
+ end
16
+ o.delete Config.config.keys
17
+ end
18
+ end
19
+
20
+ def create
21
+ if local_archive_complete?
22
+ LOG.info "Archive of snapshot #{archive_name} already completed"
23
+ return
24
+ end
25
+
26
+ LOG.info "Creating archive of snapshot: #{archive_name}"
27
+ TarWrapper.create_multiple_volumes(snapshot_folder, archive_folder)
28
+ local_archive_complete!
29
+ end
30
+
31
+ def delete providers
32
+ if local_archive_complete? and not providers.select { |provider| remote_archive_complete? provider }.empty? and Dir.exists? archive_folder
33
+ LOG.info "Removing archive folder #{archive_folder}"
34
+ FileUtils.rm_rf archive_folder
35
+ FileUtils.rm_rf snapshot_folder
36
+ end
37
+ end
38
+
39
+ def push_archive_to provider, archive_info
40
+ if provider == "google"
41
+ raise ArgumentError, "Google no longer supported"
42
+ end
43
+
44
+ $0 = "#{Config.program_name} archive pushing archive to #{provider}"
45
+ if remote_archive_complete? provider
46
+ LOG.warn "Remote archival of snapshot #{archive_name} already completed"
47
+ return
48
+ end
49
+
50
+ unless local_archive_complete?
51
+ LOG.error "Snapshot #{archive_name} not locally present"
52
+ return
53
+ end
54
+
55
+ begin
56
+ remote_path = "#{archive_info["path"]}#{remote_archive_path(archive_info)}"
57
+ start_time = Time.now
58
+ LOG.info("Beginning archive of #{snapshot_folder} to #{provider}://#{archive_info["bucket"]}/#{remote_path} @ #{Time.now}")
59
+
60
+ send("push_to_#{provider}".to_sym, archive_info, remote_path, archive_folder)
61
+ end_time = Time.now
62
+ LOG.info("Completed archive of #{snapshot_folder} to #{provider} @ #{end_time} - #{end_time - start_time}s elapsed")
63
+ rescue Exception => er
64
+ LOG.error("Failed to push archive #{snapshot_folder} to #{provider} @ #{Time.now}: #{er.message}")
65
+ er.backtrace.each { |line| LOG.error line }
66
+ end
67
+ end
68
+
69
+ private
70
+
71
+ def snapshot_folder
72
+ File.join Config.input, Config.date
73
+ end
74
+
75
+ def archive_folder
76
+ File.join Config.output, Config.date
77
+ end
78
+
79
+ def archive_name
80
+ "#{Config.date}"
81
+ end
82
+
83
+ def local_archive_complete?
84
+ File.exists? local_archive_complete_file
85
+ end
86
+
87
+ def local_archive_complete!
88
+ ::FileUtils.touch local_archive_complete_file
89
+ end
90
+
91
+ def local_archive_complete_file
92
+ "#{Config.output}/.#{archive_name}.local.done"
93
+ end
94
+
95
+ def remote_archive_complete? provider
96
+ File.exists? remote_archive_complete_file_for(provider)
97
+ end
98
+
99
+ def remote_archive_complete! provider
100
+ ::FileUtils.rm remote_archive_in_progress_file_for(provider) rescue nil
101
+ ::FileUtils.touch remote_archive_complete_file_for(provider)
102
+ end
103
+
104
+ def remote_archive_complete_file_for provider
105
+ "#{Config.output}/.#{archive_name}.remote.#{provider}.done"
106
+ end
107
+
108
+ def push_to_aws(provider, path, folder)
109
+ bucket_name = provider["bucket"]
110
+ if remote_archive_complete? "aws"
111
+ LOG.info "Remote archive has already been completed."
112
+ return
113
+ end
114
+
115
+ s3 = AWS::S3.new
116
+ s3_bucket = s3.buckets[bucket_name]
117
+ raise "Bucket does not exist" unless s3_bucket.exists?
118
+
119
+ command = "s3cmd put -r -v --multipart-chunk-size-mb=512 #{folder} s3://#{bucket_name}/#{path}"
120
+ LOG.info "Invoking: #{command}"
121
+ LOG.debug `#{command}`
122
+
123
+ if $?.to_i == 0
124
+ push_completed_marker_to_aws(bucket_name, path)
125
+ remote_archive_complete! "aws"
126
+ else
127
+ raise StandardError, "Unable to push archive to aws"
128
+ end
129
+ end
130
+
131
+ def remote_archive_path opts
132
+ "/%s%s/%s/" % [
133
+ opts[:path],
134
+ Config.date,
135
+ NODE_NAME
136
+ ]
137
+ end
138
+
139
+ def push_completed_marker_to_aws(bucket, path)
140
+ Dir.mktmpdir do |dir|
141
+ files = FileUtils.touch(File.join(dir, "_COMPLETED"))
142
+ command = "s3cmd put -v #{files.first} s3://#{bucket}/#{File.join(path, "_COMPLETED")}"
143
+ LOG.info "Pushing completed marker: #{command}"
144
+ raise StandardError, "Unable to push _COMPLETED marker" unless system(command)
145
+ end
146
+ end
147
+ end
148
+ end
@@ -0,0 +1,148 @@
1
+ require 'gli'
2
+
3
+ module Backarch
4
+ class Config
5
+ class << self
6
+ include GLI::App
7
+
8
+ attr_accessor :program_name
9
+ attr_accessor :target_app
10
+ attr_accessor :opts
11
+
12
+ DATE_FORMAT = '%Y%m%d'
13
+
14
+ def init
15
+ @program_name = $0
16
+
17
+ program_desc 'Snapshot and archival suite for Cassandra and ElasticSearch'
18
+ version Backarch::VERSION
19
+
20
+ desc 'The Cassandra, ElasticSearch, or tar data directory'
21
+ flag :i, :input
22
+
23
+ desc "Configuration file location (default: /etc/chef/elasticsearch_archive.yaml)"
24
+ flag :c, :config
25
+
26
+ desc "The date for the archive (defaults to today)"
27
+ flag :d, :date
28
+
29
+ desc "Log level (info, debug...)"
30
+ default_value 'info'
31
+ flag :l, :log_level
32
+
33
+ desc "The snapshot or tar output location"
34
+ flag :o, :output
35
+
36
+ desc "Prior snapshot directory to hard link to with rsync"
37
+ flag :p, :link_prior
38
+
39
+ desc 'Disable compression of archive'
40
+ switch :Z, :no_compress
41
+
42
+ desc 'Full path to the nodetool binary'
43
+ flag(:n, :nodetool)
44
+
45
+ desc 'The version of the cassandra application (required for pre 1.0 versions)'
46
+ flag(:V, :cassandra_version)
47
+
48
+ command(:cassandra) do |cmd|
49
+ cmd.action do |global_opts, opts, args|
50
+ @target_app = :cassandra
51
+ init_aws
52
+
53
+ case args.first
54
+ when 'snapshot'
55
+ Snapshot::Cassandra.run
56
+ when 'archive'
57
+ Archive.run
58
+ end
59
+ end
60
+ end
61
+
62
+ command(:elasticsearch) do |cmd|
63
+ cmd.action do |global_opts, opts, args|
64
+ @target_app = :elasticsearch
65
+ init_aws
66
+
67
+ case args.first
68
+ when 'snapshot'
69
+ Snapshot::Elasticsearch.run
70
+ when 'archive'
71
+ Archive.run
72
+ when 'restore'
73
+ ElasticsearchRestoration.run
74
+ end
75
+ end
76
+ end
77
+
78
+ command(:tar) do |cmd|
79
+ cmd.action do |global_opts, opts, args|
80
+ @target_app = :tar
81
+
82
+ case args.first
83
+ when 'extract'
84
+ TarExtracter.run
85
+ end
86
+ end
87
+ end
88
+
89
+ pre do |global_opts, command, opts, args|
90
+ @opts = global_opts.merge(opts)
91
+ LOG.level = LOG_LEVELS[@opts[:log_level]]
92
+ end
93
+
94
+ run(ARGV)
95
+ end
96
+
97
+ def init_aws
98
+ AWS.config \
99
+ :access_key_id => config['aws']["aws_access_key_id"],
100
+ :secret_access_key => config['aws']["aws_secret_access_key"],
101
+ :ssl_verify_peer => false,
102
+ :http_open_timeout => 2,
103
+ :http_read_timeout => 2,
104
+ :http_idle_timeout => 5
105
+ end
106
+
107
+ def compress
108
+ @opts[:no_compress] ? false : true
109
+ end
110
+
111
+ def config
112
+ @opts[:config] ||= case @target_app
113
+ when :elasticsearch then '/etc/chef/elasticsearch_archive.yaml'
114
+ when :cassandra then '/etc/chef/cassandra_archive.yaml'
115
+ end
116
+ YAML.load(File.read(@opts[:config]))
117
+ end
118
+
119
+ def date
120
+ @opts[:date] ||= Date.today.strftime('%Y%m%d')
121
+ end
122
+
123
+ def cassandra_version
124
+ @opts[:cassandra_version]
125
+ end
126
+
127
+ def output
128
+ @opts[:output].gsub(%r{/\Z}, '') if @opts[:output]
129
+ end
130
+
131
+ def input
132
+ @opts[:input].gsub(%r{/\Z}, '') if @opts[:input]
133
+ end
134
+
135
+ def nodetool
136
+ @opts[:nodetool] ||= '/usr/lib/cassandra/bin/nodetool'
137
+ end
138
+
139
+ def link_prior
140
+ @opts[:link_prior] ||= (1..30).map { |i| snapshot_dir(i * -1) }.select { |dir| Dir.exists? dir }.first
141
+ end
142
+
143
+ def snapshot_dir offset=0
144
+ "#{Config.output}/full/#{(Date.strptime(date, DATE_FORMAT) + offset).strftime(DATE_FORMAT)}"
145
+ end
146
+ end
147
+ end
148
+ end
@@ -0,0 +1,41 @@
1
+ require 'tmpdir'
2
+
3
+ module Backarch
4
+ class ElasticsearchRestoration
5
+ def self.run
6
+ instance = new
7
+ instance.run
8
+ end
9
+
10
+ def run
11
+ download
12
+ devolume
13
+ LOG.info "Restoration completed. Contents in #{output}"
14
+ end
15
+
16
+ private
17
+
18
+ def download
19
+ ParallelDownloader.download_folder input, tmpdir
20
+ end
21
+
22
+ def devolume
23
+ TarWrapper.restore_from_volumes tmpdir, output
24
+ end
25
+
26
+ def input
27
+ raise ArgumentError, "Requires input parameter to be passed." unless Config.input
28
+ raise ArgumentError, "Input must be an s3 path (eg: s3://bucket)" unless Config.input.start_with? "s3://"
29
+ Config.input
30
+ end
31
+
32
+ def tmpdir
33
+ @tmpdir ||= Dir.mktmpdir
34
+ end
35
+
36
+ def output
37
+ raise ArgumentError, "Requires output parameter to be passed." unless Config.output
38
+ Config.output
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,67 @@
1
+ require 'tempfile'
2
+
3
+ module Backarch
4
+ class ParallelDownloader
5
+ attr_reader :s3_inputs, :destination, :outputs
6
+
7
+ def self.download_folder(s3_path, destination)
8
+ s3_inputs = retrieve_volumes_from_s3(s3_path)
9
+ instance = new destination, s3_inputs
10
+ instance.download
11
+ end
12
+
13
+ def initialize(destination, s3_inputs)
14
+ validate_parallel_installed
15
+
16
+ @s3_inputs = s3_inputs
17
+ @destination = destination
18
+ @outputs = generate_output_files(destination, s3_inputs)
19
+ end
20
+
21
+ def download
22
+ LOG.info "Downloading: #{s3_inputs}"
23
+ tuples = s3_inputs.zip(outputs)
24
+ file = Tempfile.new('parallel_input')
25
+ tuples.each do |pair|
26
+ file.puts "s3cmd get -v #{pair[0]} #{pair[1]}"
27
+ end
28
+ file.close
29
+
30
+ command = "parallel --no-notice -j 70% --joblog #{joblog_path} -a #{file.path}"
31
+ LOG.info "Running in parallel: #{command}"
32
+ if !system(command)
33
+ LOG.info "Parallel downloads failed. Retrying."
34
+ raise StandardError, "Failed to complete parallel download" unless system(command + " --resume-failed")
35
+ end
36
+ ensure
37
+ file.unlink
38
+ end
39
+
40
+ private
41
+
42
+ def self.retrieve_volumes_from_s3(full_path)
43
+ s3 = AWS::S3.new
44
+ bucket_name = Config.config["aws"]["bucket"]
45
+ bucket = s3.buckets[bucket_name]
46
+ path = URI.parse(full_path).path[1..-1]
47
+ files = bucket.objects.with_prefix(File.join(path, "volume")).map(&:key)
48
+ files.map { |file| File.join("s3://", bucket_name, file) }
49
+ end
50
+
51
+ def validate_parallel_installed
52
+ `parallel --version`
53
+ rescue StandardError
54
+ raise ArgumentError, "GNU parallel not available"
55
+ end
56
+
57
+ def joblog_path
58
+ "/tmp/backarch_parallel_joblog.txt"
59
+ end
60
+
61
+ def generate_output_files(destination, s3_inputs)
62
+ s3_inputs.map do |s3_input|
63
+ File.join(destination, File.basename(s3_input))
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,23 @@
1
+ module Backarch
2
+ class Snapshot
3
+ def snapshot_name
4
+ "full_#{Config.date}"
5
+ end
6
+
7
+ class << self
8
+ def run
9
+ o = self.new
10
+ begin
11
+ o.request
12
+ o.sync
13
+ ensure
14
+ o.delete
15
+ end
16
+ end
17
+ end
18
+
19
+ def snapshot_destination
20
+ Config.snapshot_dir
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,48 @@
1
+ module Backarch
2
+ class Snapshot
3
+ class Cassandra < Snapshot
4
+ def request
5
+ LOG.info "Requesting snapshot #{snapshot_name}"
6
+ # Create a new snapshot in cassandra with the requested name
7
+ case Config.cassandra_version
8
+ when /\A0\.6\./
9
+ `#{Config.nodetool} snapshot #{snapshot_name}`
10
+ `find #{Config.input} -type f -wholename '*/backups/*' -exec rm {} \\; 2>/dev/null`
11
+ ## Going to move the directory it created to what we expect it to be now.
12
+ `for snapshot in \`find #{Config.input} -type d -wholename '*/snapshots/*-#{snapshot_name}'\`; do basedir=\`dirname $snapshot\`; rm -rfv $basedir/#{snapshot_name}; mv $snapshot $basedir/#{snapshot_name}; done`
13
+ else
14
+ `#{Config.nodetool} snapshot -t #{snapshot_name} 2>/dev/null`
15
+ `find #{Config.input} -type f -wholename '*/backups/*' -exec rm {} \\;`
16
+ end
17
+ end
18
+
19
+ def sync
20
+ $0 = "#{Config.program_name} snapshotter syncing #{snapshot_name}"
21
+ LOG.info 'Ready to sync snapshot'
22
+ snapshot_dirs = `find #{Config.input} -type d -wholename '*/snapshots/#{snapshot_name}'`.split(/\n/)
23
+ snapshot_dirs.map do |dir|
24
+ dest_dir = dir[%r{/data/(.+)/snapshots/}, 1]
25
+ prior_dir = Config.link_prior
26
+ link_prior = prior_dir ? "--link-dest=#{prior_dir}/#{dest_dir}" : ''
27
+ command = "rsync -ar --stats #{link_prior} #{dir}/ #{snapshot_destination}/#{dest_dir}"
28
+ LOG.debug command
29
+
30
+ FileUtils.mkdir_p "#{snapshot_destination}/#{dest_dir}"
31
+ FileUtils.chmod 0700, "#{snapshot_destination}/#{dest_dir}"
32
+ FileUtils.chown 'root', 'root', "#{snapshot_destination}/#{dest_dir}"
33
+ `#{command}`
34
+ end
35
+ end
36
+
37
+ def delete
38
+ LOG.info("Removing snapshot #{snapshot_name}")
39
+ case Config.cassandra_version
40
+ when /\A0\.6\./
41
+ `find #{Config.input} -type d -wholename '*/snapshots/#{snapshot_name}' -exec rm -rf {} \\;`
42
+ else
43
+ `#{Config.nodetool} clearsnapshot -t #{snapshot_name}`
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,99 @@
1
+ require 'fileutils'
2
+
3
+ module Backarch
4
+ class Snapshot
5
+ class Elasticsearch < Snapshot
6
+ def request
7
+ snapshot_destination.pass do |dir|
8
+ ::FileUtils.mkdir_p dir
9
+ ::FileUtils.chmod 0700, dir
10
+ ::FileUtils.chown 'root', 'root', dir
11
+ end
12
+
13
+ depth = Config.input.split('/').count
14
+
15
+ File.open("#{snapshot_destination}/cluster_state.local.json", "w") { |f| f.write `curl -s XGET "http://localhost:9200/_cluster/state?pretty=true&local=true"` }
16
+ File.open("#{snapshot_destination}/cluster_state.global.json", "w") { |f| f.write `curl -s XGET "http://localhost:9200/_cluster/state?pretty=true"` }
17
+
18
+ # now lets tar up our data files. these are huge, so lets be nice
19
+ indexes = `find #{Config.input} -mindepth #{depth} -maxdepth #{depth} -type d -wholename '*/indices/*' | sort`.split
20
+ indexes.map do |idx|
21
+ index_name = idx.split('/').last
22
+
23
+ LOG.info("Beginning snapshot of index #{index_name}")
24
+
25
+ es_index_flush(index_name) do
26
+ settings = `curl -s -XGET "http://localhost:9200/#{index_name}/_settings?pretty=true" 2>/dev/null | sed '1,2d' | sed '$d' | sed '$d'`.chomp
27
+ mappings = `curl -s -XGET "http://localhost:9200/#{index_name}/_mapping?pretty=true" 2>/dev/null | sed '1,2d' | sed '$d' | sed '$d'`.chomp
28
+
29
+ # time to create our restore script! oh god scripts creating scripts, this never ends well…
30
+ File.open("#{snapshot_destination}/#{index_name}.restore.sh", 'w') { |file| file.write(<<EOF) }
31
+ #!/bin/bash
32
+ # this script requires #{index_name}.tar.gz and will restore it into elasticsearch
33
+ # it is ESSENTIAL that the index you are restoring does NOT exist in ES. delete it
34
+ # if it does BEFORE trying to restore data.
35
+
36
+ curl -s -XPUT 'http://localhost:9200/#{index_name}' -d '{#{settings}},"mappings":{#{mappings}}'
37
+ echo
38
+
39
+ # extract our data files into place
40
+ echo "Restoring index (this may take a while)..."
41
+ tar xvf -C #{Config.input} --strip-components=#{depth} #{index_name}.tar.gz
42
+ echo
43
+ EOF
44
+
45
+ command = "nice -n 19 tar czf #{snapshot_destination}/#{index_name}.tar.gz -C #{Config.input} #{idx}"
46
+ LOG.info("Creating archive: #{command}")
47
+ `#{command}`
48
+ end
49
+ end
50
+ end
51
+
52
+ def sync
53
+ # NOOP
54
+ end
55
+
56
+ def delete
57
+ # NOOP
58
+ end
59
+
60
+ def es_index_flush index
61
+ begin
62
+ AWS::SimpleDB.consistent_reads do
63
+ begin
64
+ sdb = AWS::SimpleDB.new
65
+ domain = sdb.domains.create(Config.config["aws"]["path"].gsub('/','-'))
66
+ lock = domain.items["#{index}-lock"]
67
+
68
+ LOG.info("Current lock before flush: #{lock.attributes['owner'].values}")
69
+ unless lock.attributes['owner'].values.count > 0 ## Lock already taken
70
+ LOG.info "Flushing index #{index}"
71
+ `curl -s -XPOST "http://localhost:9200/#{index}/_flush"`
72
+
73
+ LOG.info "Disabling flush of transaction logs for #{index}"
74
+ `curl -s -XPUT "http://localhost:9200/#{index}/_settings?pretty=true" -d '{"index":{"translog":{"disable_flush":"true"}}}'`
75
+ end
76
+ lock.attributes['owner'].add(NODE_NAME)
77
+ LOG.info("Current lock after flush: #{lock.attributes['owner'].values}")
78
+
79
+ yield
80
+ ensure
81
+ lock.attributes['owner'].delete(NODE_NAME) rescue nil
82
+ owners = lock.attributes['owner'].values rescue []
83
+ LOG.info("Current lock before renable flush: #{owners}")
84
+
85
+ unless owners.count > 0 ## Lock still in use. Dont enable flush yet
86
+ LOG.info "Enabling flush of transaction logs for #{index}"
87
+ `curl -s -XPUT "http://localhost:9200/#{index}/_settings?pretty=true" -d '{"index":{"translog":{"disable_flush":"false"}}}'`
88
+ lock.delete if lock
89
+ end
90
+ end
91
+ end
92
+ rescue AWS::SimpleDB::Errors::ServiceUnavailable
93
+ LOG.info "Caught AWS::SimpleDB::Errors::ServiceUnavailable error. Retrying in 5 seconds"
94
+ retry
95
+ end
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,27 @@
1
+ require 'backarch/tar_wrapper'
2
+
3
+ module Backarch
4
+ class TarExtracter
5
+ def self.run
6
+ instance = new
7
+ instance.run
8
+ end
9
+
10
+ def run
11
+ LOG.info "Restoring volumes in #{input} to #{output}"
12
+ TarWrapper.restore_from_volumes(input, output)
13
+ end
14
+
15
+ private
16
+
17
+ def input
18
+ raise ArgumentError, "Requires input parameter to be passed." unless Config.input
19
+ Config.input
20
+ end
21
+
22
+ def output
23
+ raise ArgumentError, "Requires output parameter to be passed." unless Config.output
24
+ Config.output
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,69 @@
1
+ require 'fileutils'
2
+
3
+ module Backarch
4
+ class TarWrapper
5
+ #BLOCKSIZE = 10737418 # 10MB
6
+ BLOCKSIZE = 1_073_741_824 # 1GB
7
+
8
+ class << self
9
+ # Invokes GNU tar to generate a multi-volume tar for parallel upload and download.
10
+ # eg: gtar -cML 30720 -f file1.tar -f file2.tar -C /tmp/elasticsearch_snapshots/full 20140605
11
+ def create_multiple_volumes(origin, destination)
12
+ FileUtils.mkdir_p destination
13
+
14
+ tar = validate_gnu_tar_installed
15
+ output_files = generate_output_files(origin, destination)
16
+ tape_limit = BLOCKSIZE / 1024
17
+ working_directory = File.dirname origin
18
+ basename = File.basename origin
19
+
20
+ command = "#{tar} -cML #{tape_limit} -f #{output_files.join(" -f ")} -C #{working_directory} #{basename}"
21
+ LOG.info "Creating tar volumes: #{command}"
22
+ return StandardError, "Failed to create tar volumes" unless system(command)
23
+ end
24
+
25
+ def restore_from_volumes(origin, destination)
26
+ FileUtils.mkdir_p destination
27
+
28
+ tar = validate_gnu_tar_installed
29
+ input_files = generate_input_files(origin)
30
+ command = "#{tar} -xMf #{input_files.join(" -f ")} -C #{destination}"
31
+ LOG.info "Restoring from tar volumes: #{command}"
32
+ return StandardError, "Failed to restore from tar volumes" unless system(command)
33
+ end
34
+
35
+ private
36
+
37
+ def generate_input_files(folder)
38
+ Dir.glob(File.join(folder, "**", "volume*.tar"))
39
+ end
40
+
41
+ def generate_output_files(origin, destination)
42
+ volumes = number_of_volumes(origin)
43
+ raise StandardError, "Incompatible number of volumes detected from folder. Does snapshot folder #{origin} exist?" unless volumes > 0
44
+
45
+ files = []
46
+ volumes.times { |n| files << File.join(destination, "volume#{n}.tar") }
47
+ files
48
+ end
49
+
50
+ def number_of_volumes(folder)
51
+ command = "BLOCKSIZE=#{BLOCKSIZE} du -d 1 #{folder} | cut -f 1"
52
+ `#{command}`.to_i
53
+ end
54
+
55
+ def validate_gnu_tar_installed
56
+ raise StandardError unless `tar --version`.include?("GNU")
57
+ "tar"
58
+ rescue StandardError
59
+ begin
60
+ `gtar --version`
61
+ rescue StandardError
62
+ raise ArgumentError, "GNU tar is not available. This differs to OSX's tar."
63
+ else
64
+ "gtar"
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,3 @@
1
+ module Backarch
2
+ VERSION = "0.1.15"
3
+ end
@@ -0,0 +1,3 @@
1
+ DATE=`date +%Y%m%d`
2
+ ruby -I.:lib ./bin/snapshot_archive -c samples/elasticsearch_archive.yaml -i /tmp/elasticsearch_snapshots/full -o /tmp/elasticsearch_snapshots/archive -l debug -d $DATE elasticsearch archive
3
+
@@ -0,0 +1,6 @@
1
+ ---
2
+ aws:
3
+ aws_access_key_id: MYACCESSKEY
4
+ aws_secret_access_key: MYSECRET
5
+ bucket: backups.qa.data-axle.infogroup.com
6
+ path: development/elasticsearch/full/place_directory
@@ -0,0 +1,3 @@
1
+ DATE=`date +%Y%m%d`
2
+ ruby -I.:lib ./bin/snapshot_archive -i /tmp/elasticsearch_snapshots/archive/$DATE -o /tmp/elasticsearch_snapshots/devolume tar extract
3
+
@@ -0,0 +1,3 @@
1
+ DATE=`date +%Y%m%d`
2
+ ruby -I.:lib ./bin/snapshot_archive -c tmp/elasticsearch_archive.yaml -i s3://backups.qa.data-axle.infogroup.com/development/elasticsearch/full/place_directory/20140606/nil/20140606 -o /tmp/elasticsearch_restoration -l debug elasticsearch restore
3
+
@@ -0,0 +1,3 @@
1
+ #!/bin/bash
2
+ DATE=`date +%Y%m%d`
3
+ ruby -I.:lib ./bin/snapshot_archive -c tmp/elasticsearch_archive.yaml -i /usr/local/var/elasticsearch/ -o /tmp/elasticsearch_snapshots -l debug -d $DATE elasticsearch snapshot
metadata ADDED
@@ -0,0 +1,180 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: backarch
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.15
5
+ platform: ruby
6
+ authors:
7
+ - Chris DiMartino
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-06-12 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.3'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: logger
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: slop
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: aws-sdk
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: fork
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: fog
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: gli
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ description: Backup and archival utility for cassandra/elasticsearch
126
+ email:
127
+ - chris.dimartino@infogroup.com
128
+ executables:
129
+ - snapshot_archive
130
+ extensions: []
131
+ extra_rdoc_files: []
132
+ files:
133
+ - ".gitignore"
134
+ - Gemfile
135
+ - LICENSE.txt
136
+ - README.md
137
+ - Rakefile
138
+ - backarch.gemspec
139
+ - bin/snapshot_archive
140
+ - lib/backarch.rb
141
+ - lib/backarch/archive.rb
142
+ - lib/backarch/config.rb
143
+ - lib/backarch/elasticsearch_restoration.rb
144
+ - lib/backarch/parallel_downloader.rb
145
+ - lib/backarch/snapshot.rb
146
+ - lib/backarch/snapshot/cassandra.rb
147
+ - lib/backarch/snapshot/elasticsearch.rb
148
+ - lib/backarch/tar_extracter.rb
149
+ - lib/backarch/tar_wrapper.rb
150
+ - lib/backarch/version.rb
151
+ - samples/elasticsearch_archive.sh
152
+ - samples/elasticsearch_archive.yaml
153
+ - samples/elasticsearch_devolume.sh
154
+ - samples/elasticsearch_restore.sh
155
+ - samples/elasticsearch_snapshot.sh
156
+ homepage: ''
157
+ licenses:
158
+ - ''
159
+ metadata: {}
160
+ post_install_message:
161
+ rdoc_options: []
162
+ require_paths:
163
+ - lib
164
+ required_ruby_version: !ruby/object:Gem::Requirement
165
+ requirements:
166
+ - - ">="
167
+ - !ruby/object:Gem::Version
168
+ version: '0'
169
+ required_rubygems_version: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - ">="
172
+ - !ruby/object:Gem::Version
173
+ version: '0'
174
+ requirements: []
175
+ rubyforge_project:
176
+ rubygems_version: 2.2.1
177
+ signing_key:
178
+ specification_version: 4
179
+ summary: Backup and archival to cloud
180
+ test_files: []