cassback 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/bin/cassback.rb +168 -0
- data/lib/backuptool.rb +201 -0
- data/lib/cassandra.rb +153 -0
- data/lib/hadoop.rb +14 -0
- metadata +121 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 619f294e60950a9f4122ebd6879c7e51a0e524d2
|
4
|
+
data.tar.gz: f1e827c821dd8301ce03276ee5e87ca2c11e8092
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 6ef3cbf11aeca4fb97dde85241807db0c159b475c09c7cd41f75464484dbe22566590df0e4f48a540096607b14fe8a2fe8dca269b43629144f6b5bf8c8130a5b
|
7
|
+
data.tar.gz: 29ece32e3a289a3240822c9b27f3e7bd0d7fda87cf91838221343448d2056ff872f8ab056169624dde35261d153fb30aee0327f02746febd1bf06cd0e3fd3154
|
data/bin/cassback.rb
ADDED
@@ -0,0 +1,168 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'logger'
|
3
|
+
require 'optparse'
|
4
|
+
require 'yaml'
|
5
|
+
|
6
|
+
require_relative '../lib/hadoop.rb'
|
7
|
+
require_relative '../lib/cassandra.rb'
|
8
|
+
require_relative '../lib/backuptool.rb'
|
9
|
+
|
10
|
+
# This allows merging hashes that can contain themself hashes,
|
11
|
+
class ::Hash
|
12
|
+
def deep_merge!(second)
|
13
|
+
merger = proc { |_key, v1, v2| Hash === v1 && Hash === v2 ? v1.merge(v2, &merger) : Array === v1 && Array === v2 ? v1 | v2 : [:undefined, nil, :nil].include?(v2) ? v1 : v2 }
|
14
|
+
merge!(second.to_h, &merger)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# Create a Ruby logger with time/size rotation that logs both to file and console.
|
19
|
+
two_mb = 2 * 1024 * 1024
|
20
|
+
logger = Logger.new('| tee cassback.log', 'weekly', two_mb)
|
21
|
+
|
22
|
+
# Default action
|
23
|
+
action = nil
|
24
|
+
|
25
|
+
# Default config file
|
26
|
+
config_file = ''
|
27
|
+
|
28
|
+
# Default command line config
|
29
|
+
command_line_config = {}
|
30
|
+
|
31
|
+
# Default options
|
32
|
+
options = {
|
33
|
+
'cassandra' => {
|
34
|
+
'config' => '/etc/cassandra/conf/cassandra.yaml',
|
35
|
+
},
|
36
|
+
'hadoop' => {
|
37
|
+
'hostname' => 'localhost',
|
38
|
+
'port' => 14_000,
|
39
|
+
'directory' => 'cassandra',
|
40
|
+
},
|
41
|
+
'restore' => {
|
42
|
+
'destination' => 'cassandra',
|
43
|
+
},
|
44
|
+
}
|
45
|
+
|
46
|
+
# If no argument given in command line, print the help
|
47
|
+
ARGV << '-h' if ARGV.empty?
|
48
|
+
|
49
|
+
# Parse command line options
|
50
|
+
parser = OptionParser.new do |opts|
|
51
|
+
opts.banner = 'Usage: cassback.rb [options]'
|
52
|
+
|
53
|
+
opts.separator ''
|
54
|
+
opts.separator 'Configuration:'
|
55
|
+
opts.on('-C', '--config CONFIGFILE', 'Configuration file for the application') do |v|
|
56
|
+
config_file = v
|
57
|
+
end
|
58
|
+
|
59
|
+
opts.separator ''
|
60
|
+
opts.separator 'Actions:'
|
61
|
+
opts.on('-S', '--snapshot', 'creates a new snapshot and send it to Hadoop') do |_v|
|
62
|
+
action = 'new'
|
63
|
+
end
|
64
|
+
opts.on('-R', '--restore', 'restores a snapshot from Hadoop, needs a date and a destination') do |_v|
|
65
|
+
action = 'restore'
|
66
|
+
end
|
67
|
+
opts.on('-L', '--list', 'list snapshots on Hadoop') do |_v|
|
68
|
+
action = 'list'
|
69
|
+
end
|
70
|
+
opts.on('-F', '--flush', 'removes a backuped snapshot from Hadoop, needs a date') do |_v|
|
71
|
+
action = 'delete'
|
72
|
+
end
|
73
|
+
|
74
|
+
opts.separator ''
|
75
|
+
opts.separator 'Action related:'
|
76
|
+
opts.on('-n', '--node NODE', 'Cassandra server node (default is current host)') do |v|
|
77
|
+
options['node'] = v
|
78
|
+
end
|
79
|
+
opts.on('-d', '--date DATE', 'snapshot date, like YYYY_MM_DD') do |v|
|
80
|
+
options['date'] = v
|
81
|
+
end
|
82
|
+
opts.on('-t', '--destination DIR', 'local destination path for restore (default is cassandra)') do |v|
|
83
|
+
options['restore']['destination'] = v
|
84
|
+
end
|
85
|
+
|
86
|
+
opts.separator ''
|
87
|
+
opts.separator 'Hadoop (WebHDFS):'
|
88
|
+
opts.on('-H', '--host HOSTNAME', 'Hostname (default is localhost)') do |v|
|
89
|
+
command_line_config['hadoop']['host'] = v
|
90
|
+
end
|
91
|
+
opts.on('-P', '--port PORT', 'Port (default is 14000)') do |v|
|
92
|
+
command_line_config['hadoop']['port'] = v
|
93
|
+
end
|
94
|
+
opts.on('-D', '--directory DIRECTORY', 'Directory where to store backups (default is cassandra)') do |v|
|
95
|
+
command_line_config['hadoop']['directory'] = v
|
96
|
+
end
|
97
|
+
|
98
|
+
opts.separator ''
|
99
|
+
opts.separator 'Cassandra:'
|
100
|
+
opts.on('-F', '--cassandra CONFIGFILE', 'Cassandra configuration file (default is /etc/cassandra/conf/cassandra.yaml)') do |v|
|
101
|
+
command_line_config['cassandra']['config'] = v
|
102
|
+
end
|
103
|
+
|
104
|
+
opts.separator ''
|
105
|
+
opts.separator 'Help:'
|
106
|
+
opts.on('-h', '--help', 'Displays Help') do
|
107
|
+
puts opts
|
108
|
+
exit
|
109
|
+
end
|
110
|
+
end
|
111
|
+
parser.parse!
|
112
|
+
|
113
|
+
# Read the configuration file if exist
|
114
|
+
begin
|
115
|
+
options.deep_merge!(YAML.load_file(config_file))
|
116
|
+
logger.info("Using configuration file #{config_file}")
|
117
|
+
rescue
|
118
|
+
logger.warn('Unable to read configuration file, continue with default settings')
|
119
|
+
ensure
|
120
|
+
# merge with command line settings.§
|
121
|
+
options.deep_merge!command_line_config
|
122
|
+
end
|
123
|
+
|
124
|
+
# Fail if no action specified
|
125
|
+
if action.nil?
|
126
|
+
logger.error('No action given')
|
127
|
+
exit(1)
|
128
|
+
end
|
129
|
+
|
130
|
+
begin
|
131
|
+
# Create the Hadoop object
|
132
|
+
hadoop = Hadoop.new(host: options['hadoop']['hostname'], port: options['hadoop']['port'], base_dir: options['hadoop']['directory'])
|
133
|
+
|
134
|
+
# Create the Cassandra object
|
135
|
+
cassandra = Cassandra.new(options['cassandra']['config'], logger)
|
136
|
+
|
137
|
+
# Create the backup object
|
138
|
+
bck = BackupTool.new(cassandra, hadoop, logger)
|
139
|
+
|
140
|
+
# If no node specified, use the local node
|
141
|
+
options['node'] = cassandra.node_name unless options.include? 'node'
|
142
|
+
|
143
|
+
# New snapshot
|
144
|
+
if action == 'new'
|
145
|
+
bck.new_snapshot
|
146
|
+
|
147
|
+
# Restore a snapshot
|
148
|
+
elsif action == 'restore'
|
149
|
+
raise('No date given') unless options.include? 'date'
|
150
|
+
bck.restore_snapshot(options['node'], options['date'], options['restore']['destination'])
|
151
|
+
|
152
|
+
# List snapshots
|
153
|
+
elsif action == 'list'
|
154
|
+
bck.list_snapshots(node: options['node'])
|
155
|
+
|
156
|
+
# Delete a snapshot
|
157
|
+
elsif action == 'delete'
|
158
|
+
raise('No date given') unless options.include? 'date'
|
159
|
+
bck.delete_snapshots(node: options['node'], date: options['date'])
|
160
|
+
end
|
161
|
+
|
162
|
+
# In case of failure
|
163
|
+
rescue Exception => e
|
164
|
+
logger.error(e.message)
|
165
|
+
exit(1)
|
166
|
+
end
|
167
|
+
|
168
|
+
exit(0)
|
data/lib/backuptool.rb
ADDED
@@ -0,0 +1,201 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require 'table_print'
|
3
|
+
|
4
|
+
# Buffer size, used for downloads
|
5
|
+
BUFFER_SIZE = 10_000_000
|
6
|
+
|
7
|
+
# Directory where metadata is
|
8
|
+
META_DIR = 'cass_snap_metadata'.freeze
|
9
|
+
|
10
|
+
class BackupTool
|
11
|
+
# Create a new BackupTool instance
|
12
|
+
# * *Args* :
|
13
|
+
# - +cassandra+ -> Cassandra instance
|
14
|
+
# - +hadoop+ -> HDFS instance
|
15
|
+
# - +logger+ -> Logger
|
16
|
+
def initialize(cassandra, hadoop, logger)
|
17
|
+
@cassandra = cassandra
|
18
|
+
@hadoop = hadoop
|
19
|
+
@logger = logger
|
20
|
+
|
21
|
+
@metadir = META_DIR
|
22
|
+
end
|
23
|
+
|
24
|
+
# Look for snapshots
|
25
|
+
# * *Args* :
|
26
|
+
# - +node+ -> Cassandra node name
|
27
|
+
# - +date+ -> HDFS instance
|
28
|
+
def search_snapshots(node: 'ALL', date: 'ALL')
|
29
|
+
result = []
|
30
|
+
|
31
|
+
def get_snapshot_metadata(node, date)
|
32
|
+
remote = @hadoop.base_dir + '/' + @metadir + '/' + @cassandra.cluster_name + '/' + node + '/cass_snap_' + date
|
33
|
+
return @hadoop.read(remote).split("\n").to_set
|
34
|
+
rescue Exception => e
|
35
|
+
raise("Could not read metadata : #{e.message}")
|
36
|
+
end
|
37
|
+
|
38
|
+
def get_snapshots_node(node, date)
|
39
|
+
result = []
|
40
|
+
begin
|
41
|
+
if date == 'ALL'
|
42
|
+
ls = @hadoop.list("#{@hadoop.base_dir}/#{@metadir}/#{@cassandra.cluster_name}/#{node}")
|
43
|
+
ls.each do |item|
|
44
|
+
date = item['pathSuffix'].gsub('cass_snap_', '')
|
45
|
+
metadata = get_snapshot_metadata(node, date)
|
46
|
+
snapshot = CassandraSnapshot.new(@cassandra.cluster_name, node, date, metadata)
|
47
|
+
result.push(snapshot)
|
48
|
+
end
|
49
|
+
else
|
50
|
+
metadata = get_snapshot_metadata(node, date)
|
51
|
+
snapshot = CassandraSnapshot.new(@cassandra.cluster_name, node, date, metadata)
|
52
|
+
result.push(snapshot)
|
53
|
+
end
|
54
|
+
rescue Exception => e
|
55
|
+
@logger.warn("Could not get snapshots for node #{node} : #{e.message}")
|
56
|
+
end
|
57
|
+
result
|
58
|
+
end
|
59
|
+
|
60
|
+
if node == 'ALL'
|
61
|
+
begin
|
62
|
+
ls = @hadoop.list("#{@hadoop.base_dir}/#{@metadir}/#{@cassandra.cluster_name}")
|
63
|
+
ls.each do |item|
|
64
|
+
n = item['pathSuffix']
|
65
|
+
result += get_snapshots_node(n, date)
|
66
|
+
end
|
67
|
+
rescue Exception => e
|
68
|
+
@logger.warn("Could not get snapshots for cluster #{@cassandra.cluster_name} : #{e.message}")
|
69
|
+
end
|
70
|
+
else
|
71
|
+
result = get_snapshots_node(node, date)
|
72
|
+
end
|
73
|
+
|
74
|
+
result.sort
|
75
|
+
end
|
76
|
+
|
77
|
+
def list_snapshots(node: @cassandra.node_name)
|
78
|
+
@logger.info('Listing available snapshots')
|
79
|
+
snapshots = search_snapshots(node: node)
|
80
|
+
tp(snapshots, 'cluster', 'node', 'date')
|
81
|
+
end
|
82
|
+
|
83
|
+
def new_snapshot
|
84
|
+
@logger.info('Starting a new snapshot')
|
85
|
+
snapshot = @cassandra.new_snapshot
|
86
|
+
|
87
|
+
existing = search_snapshots(node: snapshot.node)
|
88
|
+
last = if existing.empty?
|
89
|
+
CassandraSnapshot.new(snapshot.cluster, snapshot.node, 'never')
|
90
|
+
else
|
91
|
+
existing[-1]
|
92
|
+
end
|
93
|
+
|
94
|
+
@logger.info('Uploading tables to Hadoop')
|
95
|
+
files = snapshot.metadata - last.metadata
|
96
|
+
@logger.info("#{files.length} files to upload")
|
97
|
+
files.each do |file|
|
98
|
+
@logger.info("Sending file #{file} to Hadoop")
|
99
|
+
local = @cassandra.data_path + '/' + file
|
100
|
+
remote = @hadoop.base_dir + '/' + snapshot.cluster + '/' + snapshot.node + '/' + file
|
101
|
+
@logger.debug("#{local} => #{remote}")
|
102
|
+
f = File.open(local, 'r')
|
103
|
+
@hadoop.create(remote, f, overwrite: true)
|
104
|
+
f.close
|
105
|
+
end
|
106
|
+
|
107
|
+
@logger.info('Sending metadata to Hadoop')
|
108
|
+
remote = @hadoop.base_dir + '/' + @metadir + '/' + snapshot.cluster + '/' + snapshot.node + '/cass_snap_' + snapshot.date
|
109
|
+
@logger.debug("metadata => #{remote}")
|
110
|
+
@hadoop.create(remote, snapshot.metadata.to_a * "\n", overwrite: true)
|
111
|
+
|
112
|
+
@cassandra.delete_snapshot(snapshot)
|
113
|
+
@logger.info('Success !')
|
114
|
+
end
|
115
|
+
|
116
|
+
def delete_snapshots(node: @cassandra.node_name, date: 'ALL')
|
117
|
+
snapshots = search_snapshots(node: node, date: date)
|
118
|
+
if snapshots.empty?
|
119
|
+
raise('No snapshot found for deletion')
|
120
|
+
else
|
121
|
+
snapshots.each do |snapshot|
|
122
|
+
@logger.info("Deleting snapshot #{snapshot}")
|
123
|
+
node_snapshots = search_snapshots(node: snapshot.node)
|
124
|
+
merged_metadata = Set.new
|
125
|
+
node_snapshots.each do |s|
|
126
|
+
merged_metadata += s.metadata if s != snapshot
|
127
|
+
end
|
128
|
+
files = snapshot.metadata - merged_metadata
|
129
|
+
@logger.info("#{files.length} files to delete")
|
130
|
+
files.each do |file|
|
131
|
+
@logger.info("Deleting file #{file}")
|
132
|
+
remote = @hadoop.base_dir + '/' + snapshot.cluster + '/' + snapshot.node + '/' + file
|
133
|
+
@logger.debug("DELETE => #{remote}")
|
134
|
+
@hadoop.delete(remote)
|
135
|
+
end
|
136
|
+
@logger.info('Deleting metadata in Hadoop')
|
137
|
+
remote = @hadoop.base_dir + '/' + @metadir + '/' + snapshot.cluster + '/' + snapshot.node + '/cass_snap_' + snapshot.date
|
138
|
+
@logger.debug("DELETE => #{remote}")
|
139
|
+
@hadoop.delete(remote)
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
# Download a file from HDFS, buffered way
|
145
|
+
# * *Args* :
|
146
|
+
# - +remote+ -> HDFS path
|
147
|
+
# - +local+ -> local path
|
148
|
+
def buffered_download(remote, local)
|
149
|
+
@logger.debug("#{remote} => #{local}")
|
150
|
+
|
151
|
+
# Create the destination directory if not exists
|
152
|
+
path = File.dirname(local)
|
153
|
+
FileUtils.mkdir_p(path) unless File.exist?(path)
|
154
|
+
|
155
|
+
file = open(local, 'wb')
|
156
|
+
|
157
|
+
offset = 0
|
158
|
+
length = BUFFER_SIZE
|
159
|
+
print '['
|
160
|
+
while length == BUFFER_SIZE
|
161
|
+
print '#'
|
162
|
+
content = @hadoop.read(remote, offset: offset, length: BUFFER_SIZE)
|
163
|
+
file.write(content)
|
164
|
+
length = content.length
|
165
|
+
offset += length
|
166
|
+
end
|
167
|
+
print "]\n"
|
168
|
+
|
169
|
+
file.close
|
170
|
+
end
|
171
|
+
|
172
|
+
# Restore a snapshot from HDFS
|
173
|
+
# * *Args* :
|
174
|
+
# - +node+ -> node where the snapshot comes from
|
175
|
+
# - +date+ -> snapshot date
|
176
|
+
# - +destination+ -> local directory where to restore
|
177
|
+
def restore_snapshot(node, date, destination)
|
178
|
+
# Search the snapshot matching node and date
|
179
|
+
snapshots = search_snapshots(node: node, date: date)
|
180
|
+
|
181
|
+
if snapshots.empty?
|
182
|
+
raise('No snapshot found for restore')
|
183
|
+
elsif snapshots.length > 1
|
184
|
+
raise('More than one candidate snapshot to restore')
|
185
|
+
else
|
186
|
+
snapshot = snapshots[0]
|
187
|
+
@logger.info("Restoring snapshot #{snapshot}")
|
188
|
+
@logger.info("#{snapshot.metadata.length} files to restore")
|
189
|
+
|
190
|
+
# For each file in metadata
|
191
|
+
snapshot.metadata.each do |file|
|
192
|
+
@logger.info("Restoring file #{file}")
|
193
|
+
local = destination + '/' + file
|
194
|
+
remote = @hadoop.base_dir + '/' + snapshot.cluster + '/' + snapshot.node + '/' + file
|
195
|
+
# Download the file from hdfs
|
196
|
+
buffered_download(remote, local)
|
197
|
+
end
|
198
|
+
@logger.info('Success !')
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|
data/lib/cassandra.rb
ADDED
@@ -0,0 +1,153 @@
|
|
1
|
+
require 'set'
|
2
|
+
require 'socket'
|
3
|
+
require 'yaml'
|
4
|
+
|
5
|
+
class Cassandra
|
6
|
+
attr_reader :data_path, :cluster_name, :node_name
|
7
|
+
|
8
|
+
def initialize(config_file, logger)
|
9
|
+
@logger = logger
|
10
|
+
|
11
|
+
read_config_file(config_file)
|
12
|
+
|
13
|
+
@node_name = Socket.gethostname
|
14
|
+
|
15
|
+
@logger.info("Cassandra cluster name = #{@cluster_name}")
|
16
|
+
@logger.info("Cassandra node name = #{@node_name}")
|
17
|
+
@logger.info("Cassandra data path = #{@data_path}")
|
18
|
+
end
|
19
|
+
|
20
|
+
def read_config_file(config_file)
|
21
|
+
config = YAML.load_file(config_file)
|
22
|
+
if config.include? 'cluster_name'
|
23
|
+
@cluster_name = config['cluster_name'].tr(' ', '_')
|
24
|
+
else
|
25
|
+
@logger.warn("Could not found cluster name in Cassandra config file #{@config_file}")
|
26
|
+
@cluster_name = 'noname_cassandra_cluster'
|
27
|
+
end
|
28
|
+
if config.include? 'data_file_directories'
|
29
|
+
if config['data_file_directories'].length == 1
|
30
|
+
@data_path = config['data_file_directories'][0]
|
31
|
+
else
|
32
|
+
# TODO : manage multiple data directories
|
33
|
+
raise('This backup tool does not currently work with multiple data directories')
|
34
|
+
end
|
35
|
+
else
|
36
|
+
raise('Not data directory defined in config file')
|
37
|
+
end
|
38
|
+
rescue Exception => e
|
39
|
+
raise("Could not parse Cassandra config file #{config_file} (#{e.message})")
|
40
|
+
end
|
41
|
+
|
42
|
+
private :read_config_file
|
43
|
+
|
44
|
+
def nodetool_snapshot(name)
|
45
|
+
@logger.debug("Starting a new Cassandra snapshot #{name}")
|
46
|
+
begin
|
47
|
+
success = system('nodetool', 'snapshot', '-t', name)
|
48
|
+
if success
|
49
|
+
@logger.debug('Cassandra Snapshot successful')
|
50
|
+
else
|
51
|
+
raise
|
52
|
+
end
|
53
|
+
rescue Exception => e
|
54
|
+
raise("Error while snapshot command (#{e.message})")
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
private :nodetool_snapshot
|
59
|
+
|
60
|
+
def nodetool_clearsnapshot(name)
|
61
|
+
@logger.debug("Deleting snapshot #{name} in Cassandra")
|
62
|
+
begin
|
63
|
+
success = system('nodetool', 'clearsnapshot', '-t', name)
|
64
|
+
if success
|
65
|
+
@logger.debug('Cassandra Snapshot deletion successful')
|
66
|
+
else
|
67
|
+
raise
|
68
|
+
end
|
69
|
+
rescue Exception => e
|
70
|
+
raise("Error while clearsnapshot command (#{e.message})")
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
private :nodetool_clearsnapshot
|
75
|
+
|
76
|
+
def get_keyspaces_and_tables
|
77
|
+
result = {}
|
78
|
+
Dir.foreach(@data_path) do |keyspace|
|
79
|
+
next if keyspace == '.' || keyspace == '..'
|
80
|
+
result[keyspace] = []
|
81
|
+
Dir.foreach(@data_path + '/' + keyspace) do |table|
|
82
|
+
next if table == '.' || table == '..'
|
83
|
+
result[keyspace].push(table)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
result
|
87
|
+
end
|
88
|
+
|
89
|
+
private :get_keyspaces_and_tables
|
90
|
+
|
91
|
+
def build_metadata(name)
|
92
|
+
result = Set.new
|
93
|
+
ks = get_keyspaces_and_tables
|
94
|
+
ks.each do |keyspace, tables|
|
95
|
+
tables.each do |table|
|
96
|
+
snapdir = @data_path + '/' + keyspace + '/' + table + '/snapshots/' + name
|
97
|
+
next unless Dir.exist?(snapdir)
|
98
|
+
Dir.foreach(snapdir) do |filename|
|
99
|
+
next if filename == '.' || filename == '..'
|
100
|
+
result.add(keyspace + '/' + table + '/snapshots/' + name + '/' + filename)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
result
|
105
|
+
end
|
106
|
+
|
107
|
+
private :build_metadata
|
108
|
+
|
109
|
+
def new_snapshot
|
110
|
+
today = Time.new.strftime('%Y_%m_%d')
|
111
|
+
snapname = 'cass_snap_' + today
|
112
|
+
|
113
|
+
nodetool_snapshot(snapname)
|
114
|
+
metadata = build_metadata(snapname)
|
115
|
+
|
116
|
+
CassandraSnapshot.new(@cluster_name, @node_name, today, metadata)
|
117
|
+
end
|
118
|
+
|
119
|
+
def delete_snapshot(snapshot)
|
120
|
+
snapname = 'cass_snap_' + snapshot.date
|
121
|
+
nodetool_clearsnapshot(snapname)
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
class CassandraSnapshot
|
126
|
+
attr_reader :cluster, :node, :date, :metadata
|
127
|
+
|
128
|
+
def initialize(cluster, node, date, metadata = nil)
|
129
|
+
@cluster = cluster
|
130
|
+
@node = node
|
131
|
+
@date = date
|
132
|
+
@metadata = if metadata.nil?
|
133
|
+
Set.new
|
134
|
+
else
|
135
|
+
metadata
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
def to_s
|
140
|
+
"[#{@cluster}|#{@node}|#{@date}]"
|
141
|
+
end
|
142
|
+
|
143
|
+
def ==(other)
|
144
|
+
@cluster == other.cluster && @node == other.node && @date == other.date
|
145
|
+
end
|
146
|
+
|
147
|
+
def <=>(other)
|
148
|
+
c = @cluster <=> other.cluster
|
149
|
+
n = @node <=> other.node
|
150
|
+
d = @date <=> other.date
|
151
|
+
c * 3 + n * 2 + d
|
152
|
+
end
|
153
|
+
end
|
data/lib/hadoop.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'webhdfs'
|
2
|
+
require 'webhdfs/fileutils'
|
3
|
+
|
4
|
+
WebHDFS::ClientV1::REDIRECTED_OPERATIONS.delete('OPEN')
|
5
|
+
|
6
|
+
class Hadoop < WebHDFS::Client
|
7
|
+
attr_reader :base_dir
|
8
|
+
|
9
|
+
def initialize(host: 'localhost', port: 14_000, base_dir: '/')
|
10
|
+
super(host = host, port = port)
|
11
|
+
@kerberos = true
|
12
|
+
@base_dir = base_dir
|
13
|
+
end
|
14
|
+
end
|
metadata
ADDED
@@ -0,0 +1,121 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: cassback
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Vincent Van Hollebeke
|
8
|
+
- Bogdan Niculescu
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2016-04-20 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: bundler
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - "~>"
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '1.11'
|
21
|
+
type: :development
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - "~>"
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: '1.11'
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: rake
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - "~>"
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '10.0'
|
35
|
+
type: :development
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - "~>"
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '10.0'
|
42
|
+
- !ruby/object:Gem::Dependency
|
43
|
+
name: webhdfs
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - "~>"
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: '0.8'
|
49
|
+
- - ">="
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
version: 0.8.0
|
52
|
+
type: :runtime
|
53
|
+
prerelease: false
|
54
|
+
version_requirements: !ruby/object:Gem::Requirement
|
55
|
+
requirements:
|
56
|
+
- - "~>"
|
57
|
+
- !ruby/object:Gem::Version
|
58
|
+
version: '0.8'
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 0.8.0
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: table_print
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '1.5'
|
69
|
+
- - ">="
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: 1.5.6
|
72
|
+
type: :runtime
|
73
|
+
prerelease: false
|
74
|
+
version_requirements: !ruby/object:Gem::Requirement
|
75
|
+
requirements:
|
76
|
+
- - "~>"
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
version: '1.5'
|
79
|
+
- - ">="
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: 1.5.6
|
82
|
+
description: This is a tool that allows creating backups of Cassandra and pushing
|
83
|
+
them into HDFS.
|
84
|
+
email:
|
85
|
+
- v.vanhollebeke@criteo.com
|
86
|
+
- b.niculescu@criteo.com
|
87
|
+
executables:
|
88
|
+
- cassback.rb
|
89
|
+
extensions: []
|
90
|
+
extra_rdoc_files: []
|
91
|
+
files:
|
92
|
+
- bin/cassback.rb
|
93
|
+
- lib/backuptool.rb
|
94
|
+
- lib/cassandra.rb
|
95
|
+
- lib/hadoop.rb
|
96
|
+
homepage: http://rubygems.org/gems/cassback
|
97
|
+
licenses:
|
98
|
+
- Apache2
|
99
|
+
metadata: {}
|
100
|
+
post_install_message:
|
101
|
+
rdoc_options: []
|
102
|
+
require_paths:
|
103
|
+
- lib
|
104
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
105
|
+
requirements:
|
106
|
+
- - ">="
|
107
|
+
- !ruby/object:Gem::Version
|
108
|
+
version: '0'
|
109
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
110
|
+
requirements:
|
111
|
+
- - ">="
|
112
|
+
- !ruby/object:Gem::Version
|
113
|
+
version: '0'
|
114
|
+
requirements: []
|
115
|
+
rubyforge_project:
|
116
|
+
rubygems_version: 2.5.2
|
117
|
+
signing_key:
|
118
|
+
specification_version: 4
|
119
|
+
summary: Cassandra backup to HDFS.
|
120
|
+
test_files: []
|
121
|
+
has_rdoc:
|