cassback 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/cassback.rb +168 -0
- data/lib/backuptool.rb +201 -0
- data/lib/cassandra.rb +153 -0
- data/lib/hadoop.rb +14 -0
- metadata +121 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 619f294e60950a9f4122ebd6879c7e51a0e524d2
|
4
|
+
data.tar.gz: f1e827c821dd8301ce03276ee5e87ca2c11e8092
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 6ef3cbf11aeca4fb97dde85241807db0c159b475c09c7cd41f75464484dbe22566590df0e4f48a540096607b14fe8a2fe8dca269b43629144f6b5bf8c8130a5b
|
7
|
+
data.tar.gz: 29ece32e3a289a3240822c9b27f3e7bd0d7fda87cf91838221343448d2056ff872f8ab056169624dde35261d153fb30aee0327f02746febd1bf06cd0e3fd3154
|
data/bin/cassback.rb
ADDED
@@ -0,0 +1,168 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'logger'
|
3
|
+
require 'optparse'
|
4
|
+
require 'yaml'
|
5
|
+
|
6
|
+
require_relative '../lib/hadoop.rb'
|
7
|
+
require_relative '../lib/cassandra.rb'
|
8
|
+
require_relative '../lib/backuptool.rb'
|
9
|
+
|
10
|
+
# This allows merging hashes that can contain themself hashes,
|
11
|
+
class ::Hash
|
12
|
+
def deep_merge!(second)
|
13
|
+
merger = proc { |_key, v1, v2| Hash === v1 && Hash === v2 ? v1.merge(v2, &merger) : Array === v1 && Array === v2 ? v1 | v2 : [:undefined, nil, :nil].include?(v2) ? v1 : v2 }
|
14
|
+
merge!(second.to_h, &merger)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# Create a Ruby logger with time/size rotation that logs both to file and console.
|
19
|
+
two_mb = 2 * 1024 * 1024
|
20
|
+
logger = Logger.new('| tee cassback.log', 'weekly', two_mb)
|
21
|
+
|
22
|
+
# Default action
|
23
|
+
action = nil
|
24
|
+
|
25
|
+
# Default config file
|
26
|
+
config_file = ''
|
27
|
+
|
28
|
+
# Default command line config
|
29
|
+
command_line_config = {}
|
30
|
+
|
31
|
+
# Default options
|
32
|
+
options = {
|
33
|
+
'cassandra' => {
|
34
|
+
'config' => '/etc/cassandra/conf/cassandra.yaml',
|
35
|
+
},
|
36
|
+
'hadoop' => {
|
37
|
+
'hostname' => 'localhost',
|
38
|
+
'port' => 14_000,
|
39
|
+
'directory' => 'cassandra',
|
40
|
+
},
|
41
|
+
'restore' => {
|
42
|
+
'destination' => 'cassandra',
|
43
|
+
},
|
44
|
+
}
|
45
|
+
|
46
|
+
# If no argument given in command line, print the help
|
47
|
+
ARGV << '-h' if ARGV.empty?
|
48
|
+
|
49
|
+
# Parse command line options
|
50
|
+
parser = OptionParser.new do |opts|
|
51
|
+
opts.banner = 'Usage: cassback.rb [options]'
|
52
|
+
|
53
|
+
opts.separator ''
|
54
|
+
opts.separator 'Configuration:'
|
55
|
+
opts.on('-C', '--config CONFIGFILE', 'Configuration file for the application') do |v|
|
56
|
+
config_file = v
|
57
|
+
end
|
58
|
+
|
59
|
+
opts.separator ''
|
60
|
+
opts.separator 'Actions:'
|
61
|
+
opts.on('-S', '--snapshot', 'creates a new snapshot and send it to Hadoop') do |_v|
|
62
|
+
action = 'new'
|
63
|
+
end
|
64
|
+
opts.on('-R', '--restore', 'restores a snapshot from Hadoop, needs a date and a destination') do |_v|
|
65
|
+
action = 'restore'
|
66
|
+
end
|
67
|
+
opts.on('-L', '--list', 'list snapshots on Hadoop') do |_v|
|
68
|
+
action = 'list'
|
69
|
+
end
|
70
|
+
opts.on('-F', '--flush', 'removes a backuped snapshot from Hadoop, needs a date') do |_v|
|
71
|
+
action = 'delete'
|
72
|
+
end
|
73
|
+
|
74
|
+
opts.separator ''
|
75
|
+
opts.separator 'Action related:'
|
76
|
+
opts.on('-n', '--node NODE', 'Cassandra server node (default is current host)') do |v|
|
77
|
+
options['node'] = v
|
78
|
+
end
|
79
|
+
opts.on('-d', '--date DATE', 'snapshot date, like YYYY_MM_DD') do |v|
|
80
|
+
options['date'] = v
|
81
|
+
end
|
82
|
+
opts.on('-t', '--destination DIR', 'local destination path for restore (default is cassandra)') do |v|
|
83
|
+
options['restore']['destination'] = v
|
84
|
+
end
|
85
|
+
|
86
|
+
opts.separator ''
|
87
|
+
opts.separator 'Hadoop (WebHDFS):'
|
88
|
+
opts.on('-H', '--host HOSTNAME', 'Hostname (default is localhost)') do |v|
|
89
|
+
command_line_config['hadoop']['host'] = v
|
90
|
+
end
|
91
|
+
opts.on('-P', '--port PORT', 'Port (default is 14000)') do |v|
|
92
|
+
command_line_config['hadoop']['port'] = v
|
93
|
+
end
|
94
|
+
opts.on('-D', '--directory DIRECTORY', 'Directory where to store backups (default is cassandra)') do |v|
|
95
|
+
command_line_config['hadoop']['directory'] = v
|
96
|
+
end
|
97
|
+
|
98
|
+
opts.separator ''
|
99
|
+
opts.separator 'Cassandra:'
|
100
|
+
opts.on('-F', '--cassandra CONFIGFILE', 'Cassandra configuration file (default is /etc/cassandra/conf/cassandra.yaml)') do |v|
|
101
|
+
command_line_config['cassandra']['config'] = v
|
102
|
+
end
|
103
|
+
|
104
|
+
opts.separator ''
|
105
|
+
opts.separator 'Help:'
|
106
|
+
opts.on('-h', '--help', 'Displays Help') do
|
107
|
+
puts opts
|
108
|
+
exit
|
109
|
+
end
|
110
|
+
end
|
111
|
+
parser.parse!
|
112
|
+
|
113
|
+
# Read the configuration file if exist
|
114
|
+
begin
|
115
|
+
options.deep_merge!(YAML.load_file(config_file))
|
116
|
+
logger.info("Using configuration file #{config_file}")
|
117
|
+
rescue
|
118
|
+
logger.warn('Unable to read configuration file, continue with default settings')
|
119
|
+
ensure
|
120
|
+
# merge with command line settings.§
|
121
|
+
options.deep_merge!command_line_config
|
122
|
+
end
|
123
|
+
|
124
|
+
# Fail if no action specified
|
125
|
+
if action.nil?
|
126
|
+
logger.error('No action given')
|
127
|
+
exit(1)
|
128
|
+
end
|
129
|
+
|
130
|
+
begin
|
131
|
+
# Create the Hadoop object
|
132
|
+
hadoop = Hadoop.new(host: options['hadoop']['hostname'], port: options['hadoop']['port'], base_dir: options['hadoop']['directory'])
|
133
|
+
|
134
|
+
# Create the Cassandra object
|
135
|
+
cassandra = Cassandra.new(options['cassandra']['config'], logger)
|
136
|
+
|
137
|
+
# Create the backup object
|
138
|
+
bck = BackupTool.new(cassandra, hadoop, logger)
|
139
|
+
|
140
|
+
# If no node specified, use the local node
|
141
|
+
options['node'] = cassandra.node_name unless options.include? 'node'
|
142
|
+
|
143
|
+
# New snapshot
|
144
|
+
if action == 'new'
|
145
|
+
bck.new_snapshot
|
146
|
+
|
147
|
+
# Restore a snapshot
|
148
|
+
elsif action == 'restore'
|
149
|
+
raise('No date given') unless options.include? 'date'
|
150
|
+
bck.restore_snapshot(options['node'], options['date'], options['restore']['destination'])
|
151
|
+
|
152
|
+
# List snapshots
|
153
|
+
elsif action == 'list'
|
154
|
+
bck.list_snapshots(node: options['node'])
|
155
|
+
|
156
|
+
# Delete a snapshot
|
157
|
+
elsif action == 'delete'
|
158
|
+
raise('No date given') unless options.include? 'date'
|
159
|
+
bck.delete_snapshots(node: options['node'], date: options['date'])
|
160
|
+
end
|
161
|
+
|
162
|
+
# In case of failure
|
163
|
+
rescue Exception => e
|
164
|
+
logger.error(e.message)
|
165
|
+
exit(1)
|
166
|
+
end
|
167
|
+
|
168
|
+
exit(0)
|
data/lib/backuptool.rb
ADDED
@@ -0,0 +1,201 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require 'table_print'
|
3
|
+
|
4
|
+
# Buffer size, used for downloads
|
5
|
+
BUFFER_SIZE = 10_000_000
|
6
|
+
|
7
|
+
# Directory where metadata is
|
8
|
+
META_DIR = 'cass_snap_metadata'.freeze
|
9
|
+
|
10
|
+
class BackupTool
|
11
|
+
# Create a new BackupTool instance
|
12
|
+
# * *Args* :
|
13
|
+
# - +cassandra+ -> Cassandra instance
|
14
|
+
# - +hadoop+ -> HDFS instance
|
15
|
+
# - +logger+ -> Logger
|
16
|
+
def initialize(cassandra, hadoop, logger)
|
17
|
+
@cassandra = cassandra
|
18
|
+
@hadoop = hadoop
|
19
|
+
@logger = logger
|
20
|
+
|
21
|
+
@metadir = META_DIR
|
22
|
+
end
|
23
|
+
|
24
|
+
# Look for snapshots
|
25
|
+
# * *Args* :
|
26
|
+
# - +node+ -> Cassandra node name
|
27
|
+
# - +date+ -> HDFS instance
|
28
|
+
def search_snapshots(node: 'ALL', date: 'ALL')
|
29
|
+
result = []
|
30
|
+
|
31
|
+
def get_snapshot_metadata(node, date)
|
32
|
+
remote = @hadoop.base_dir + '/' + @metadir + '/' + @cassandra.cluster_name + '/' + node + '/cass_snap_' + date
|
33
|
+
return @hadoop.read(remote).split("\n").to_set
|
34
|
+
rescue Exception => e
|
35
|
+
raise("Could not read metadata : #{e.message}")
|
36
|
+
end
|
37
|
+
|
38
|
+
def get_snapshots_node(node, date)
|
39
|
+
result = []
|
40
|
+
begin
|
41
|
+
if date == 'ALL'
|
42
|
+
ls = @hadoop.list("#{@hadoop.base_dir}/#{@metadir}/#{@cassandra.cluster_name}/#{node}")
|
43
|
+
ls.each do |item|
|
44
|
+
date = item['pathSuffix'].gsub('cass_snap_', '')
|
45
|
+
metadata = get_snapshot_metadata(node, date)
|
46
|
+
snapshot = CassandraSnapshot.new(@cassandra.cluster_name, node, date, metadata)
|
47
|
+
result.push(snapshot)
|
48
|
+
end
|
49
|
+
else
|
50
|
+
metadata = get_snapshot_metadata(node, date)
|
51
|
+
snapshot = CassandraSnapshot.new(@cassandra.cluster_name, node, date, metadata)
|
52
|
+
result.push(snapshot)
|
53
|
+
end
|
54
|
+
rescue Exception => e
|
55
|
+
@logger.warn("Could not get snapshots for node #{node} : #{e.message}")
|
56
|
+
end
|
57
|
+
result
|
58
|
+
end
|
59
|
+
|
60
|
+
if node == 'ALL'
|
61
|
+
begin
|
62
|
+
ls = @hadoop.list("#{@hadoop.base_dir}/#{@metadir}/#{@cassandra.cluster_name}")
|
63
|
+
ls.each do |item|
|
64
|
+
n = item['pathSuffix']
|
65
|
+
result += get_snapshots_node(n, date)
|
66
|
+
end
|
67
|
+
rescue Exception => e
|
68
|
+
@logger.warn("Could not get snapshots for cluster #{@cassandra.cluster_name} : #{e.message}")
|
69
|
+
end
|
70
|
+
else
|
71
|
+
result = get_snapshots_node(node, date)
|
72
|
+
end
|
73
|
+
|
74
|
+
result.sort
|
75
|
+
end
|
76
|
+
|
77
|
+
def list_snapshots(node: @cassandra.node_name)
|
78
|
+
@logger.info('Listing available snapshots')
|
79
|
+
snapshots = search_snapshots(node: node)
|
80
|
+
tp(snapshots, 'cluster', 'node', 'date')
|
81
|
+
end
|
82
|
+
|
83
|
+
def new_snapshot
|
84
|
+
@logger.info('Starting a new snapshot')
|
85
|
+
snapshot = @cassandra.new_snapshot
|
86
|
+
|
87
|
+
existing = search_snapshots(node: snapshot.node)
|
88
|
+
last = if existing.empty?
|
89
|
+
CassandraSnapshot.new(snapshot.cluster, snapshot.node, 'never')
|
90
|
+
else
|
91
|
+
existing[-1]
|
92
|
+
end
|
93
|
+
|
94
|
+
@logger.info('Uploading tables to Hadoop')
|
95
|
+
files = snapshot.metadata - last.metadata
|
96
|
+
@logger.info("#{files.length} files to upload")
|
97
|
+
files.each do |file|
|
98
|
+
@logger.info("Sending file #{file} to Hadoop")
|
99
|
+
local = @cassandra.data_path + '/' + file
|
100
|
+
remote = @hadoop.base_dir + '/' + snapshot.cluster + '/' + snapshot.node + '/' + file
|
101
|
+
@logger.debug("#{local} => #{remote}")
|
102
|
+
f = File.open(local, 'r')
|
103
|
+
@hadoop.create(remote, f, overwrite: true)
|
104
|
+
f.close
|
105
|
+
end
|
106
|
+
|
107
|
+
@logger.info('Sending metadata to Hadoop')
|
108
|
+
remote = @hadoop.base_dir + '/' + @metadir + '/' + snapshot.cluster + '/' + snapshot.node + '/cass_snap_' + snapshot.date
|
109
|
+
@logger.debug("metadata => #{remote}")
|
110
|
+
@hadoop.create(remote, snapshot.metadata.to_a * "\n", overwrite: true)
|
111
|
+
|
112
|
+
@cassandra.delete_snapshot(snapshot)
|
113
|
+
@logger.info('Success !')
|
114
|
+
end
|
115
|
+
|
116
|
+
def delete_snapshots(node: @cassandra.node_name, date: 'ALL')
|
117
|
+
snapshots = search_snapshots(node: node, date: date)
|
118
|
+
if snapshots.empty?
|
119
|
+
raise('No snapshot found for deletion')
|
120
|
+
else
|
121
|
+
snapshots.each do |snapshot|
|
122
|
+
@logger.info("Deleting snapshot #{snapshot}")
|
123
|
+
node_snapshots = search_snapshots(node: snapshot.node)
|
124
|
+
merged_metadata = Set.new
|
125
|
+
node_snapshots.each do |s|
|
126
|
+
merged_metadata += s.metadata if s != snapshot
|
127
|
+
end
|
128
|
+
files = snapshot.metadata - merged_metadata
|
129
|
+
@logger.info("#{files.length} files to delete")
|
130
|
+
files.each do |file|
|
131
|
+
@logger.info("Deleting file #{file}")
|
132
|
+
remote = @hadoop.base_dir + '/' + snapshot.cluster + '/' + snapshot.node + '/' + file
|
133
|
+
@logger.debug("DELETE => #{remote}")
|
134
|
+
@hadoop.delete(remote)
|
135
|
+
end
|
136
|
+
@logger.info('Deleting metadata in Hadoop')
|
137
|
+
remote = @hadoop.base_dir + '/' + @metadir + '/' + snapshot.cluster + '/' + snapshot.node + '/cass_snap_' + snapshot.date
|
138
|
+
@logger.debug("DELETE => #{remote}")
|
139
|
+
@hadoop.delete(remote)
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
# Download a file from HDFS, buffered way
|
145
|
+
# * *Args* :
|
146
|
+
# - +remote+ -> HDFS path
|
147
|
+
# - +local+ -> local path
|
148
|
+
def buffered_download(remote, local)
|
149
|
+
@logger.debug("#{remote} => #{local}")
|
150
|
+
|
151
|
+
# Create the destination directory if not exists
|
152
|
+
path = File.dirname(local)
|
153
|
+
FileUtils.mkdir_p(path) unless File.exist?(path)
|
154
|
+
|
155
|
+
file = open(local, 'wb')
|
156
|
+
|
157
|
+
offset = 0
|
158
|
+
length = BUFFER_SIZE
|
159
|
+
print '['
|
160
|
+
while length == BUFFER_SIZE
|
161
|
+
print '#'
|
162
|
+
content = @hadoop.read(remote, offset: offset, length: BUFFER_SIZE)
|
163
|
+
file.write(content)
|
164
|
+
length = content.length
|
165
|
+
offset += length
|
166
|
+
end
|
167
|
+
print "]\n"
|
168
|
+
|
169
|
+
file.close
|
170
|
+
end
|
171
|
+
|
172
|
+
# Restore a snapshot from HDFS
|
173
|
+
# * *Args* :
|
174
|
+
# - +node+ -> node where the snapshot comes from
|
175
|
+
# - +date+ -> snapshot date
|
176
|
+
# - +destination+ -> local directory where to restore
|
177
|
+
def restore_snapshot(node, date, destination)
|
178
|
+
# Search the snapshot matching node and date
|
179
|
+
snapshots = search_snapshots(node: node, date: date)
|
180
|
+
|
181
|
+
if snapshots.empty?
|
182
|
+
raise('No snapshot found for restore')
|
183
|
+
elsif snapshots.length > 1
|
184
|
+
raise('More than one candidate snapshot to restore')
|
185
|
+
else
|
186
|
+
snapshot = snapshots[0]
|
187
|
+
@logger.info("Restoring snapshot #{snapshot}")
|
188
|
+
@logger.info("#{snapshot.metadata.length} files to restore")
|
189
|
+
|
190
|
+
# For each file in metadata
|
191
|
+
snapshot.metadata.each do |file|
|
192
|
+
@logger.info("Restoring file #{file}")
|
193
|
+
local = destination + '/' + file
|
194
|
+
remote = @hadoop.base_dir + '/' + snapshot.cluster + '/' + snapshot.node + '/' + file
|
195
|
+
# Download the file from hdfs
|
196
|
+
buffered_download(remote, local)
|
197
|
+
end
|
198
|
+
@logger.info('Success !')
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|
data/lib/cassandra.rb
ADDED
@@ -0,0 +1,153 @@
|
|
1
|
+
require 'set'
|
2
|
+
require 'socket'
|
3
|
+
require 'yaml'
|
4
|
+
|
5
|
+
class Cassandra
|
6
|
+
attr_reader :data_path, :cluster_name, :node_name
|
7
|
+
|
8
|
+
def initialize(config_file, logger)
|
9
|
+
@logger = logger
|
10
|
+
|
11
|
+
read_config_file(config_file)
|
12
|
+
|
13
|
+
@node_name = Socket.gethostname
|
14
|
+
|
15
|
+
@logger.info("Cassandra cluster name = #{@cluster_name}")
|
16
|
+
@logger.info("Cassandra node name = #{@node_name}")
|
17
|
+
@logger.info("Cassandra data path = #{@data_path}")
|
18
|
+
end
|
19
|
+
|
20
|
+
def read_config_file(config_file)
|
21
|
+
config = YAML.load_file(config_file)
|
22
|
+
if config.include? 'cluster_name'
|
23
|
+
@cluster_name = config['cluster_name'].tr(' ', '_')
|
24
|
+
else
|
25
|
+
@logger.warn("Could not found cluster name in Cassandra config file #{@config_file}")
|
26
|
+
@cluster_name = 'noname_cassandra_cluster'
|
27
|
+
end
|
28
|
+
if config.include? 'data_file_directories'
|
29
|
+
if config['data_file_directories'].length == 1
|
30
|
+
@data_path = config['data_file_directories'][0]
|
31
|
+
else
|
32
|
+
# TODO : manage multiple data directories
|
33
|
+
raise('This backup tool does not currently work with multiple data directories')
|
34
|
+
end
|
35
|
+
else
|
36
|
+
raise('Not data directory defined in config file')
|
37
|
+
end
|
38
|
+
rescue Exception => e
|
39
|
+
raise("Could not parse Cassandra config file #{config_file} (#{e.message})")
|
40
|
+
end
|
41
|
+
|
42
|
+
private :read_config_file
|
43
|
+
|
44
|
+
def nodetool_snapshot(name)
|
45
|
+
@logger.debug("Starting a new Cassandra snapshot #{name}")
|
46
|
+
begin
|
47
|
+
success = system('nodetool', 'snapshot', '-t', name)
|
48
|
+
if success
|
49
|
+
@logger.debug('Cassandra Snapshot successful')
|
50
|
+
else
|
51
|
+
raise
|
52
|
+
end
|
53
|
+
rescue Exception => e
|
54
|
+
raise("Error while snapshot command (#{e.message})")
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
private :nodetool_snapshot
|
59
|
+
|
60
|
+
def nodetool_clearsnapshot(name)
|
61
|
+
@logger.debug("Deleting snapshot #{name} in Cassandra")
|
62
|
+
begin
|
63
|
+
success = system('nodetool', 'clearsnapshot', '-t', name)
|
64
|
+
if success
|
65
|
+
@logger.debug('Cassandra Snapshot deletion successful')
|
66
|
+
else
|
67
|
+
raise
|
68
|
+
end
|
69
|
+
rescue Exception => e
|
70
|
+
raise("Error while clearsnapshot command (#{e.message})")
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
private :nodetool_clearsnapshot
|
75
|
+
|
76
|
+
def get_keyspaces_and_tables
|
77
|
+
result = {}
|
78
|
+
Dir.foreach(@data_path) do |keyspace|
|
79
|
+
next if keyspace == '.' || keyspace == '..'
|
80
|
+
result[keyspace] = []
|
81
|
+
Dir.foreach(@data_path + '/' + keyspace) do |table|
|
82
|
+
next if table == '.' || table == '..'
|
83
|
+
result[keyspace].push(table)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
result
|
87
|
+
end
|
88
|
+
|
89
|
+
private :get_keyspaces_and_tables
|
90
|
+
|
91
|
+
def build_metadata(name)
|
92
|
+
result = Set.new
|
93
|
+
ks = get_keyspaces_and_tables
|
94
|
+
ks.each do |keyspace, tables|
|
95
|
+
tables.each do |table|
|
96
|
+
snapdir = @data_path + '/' + keyspace + '/' + table + '/snapshots/' + name
|
97
|
+
next unless Dir.exist?(snapdir)
|
98
|
+
Dir.foreach(snapdir) do |filename|
|
99
|
+
next if filename == '.' || filename == '..'
|
100
|
+
result.add(keyspace + '/' + table + '/snapshots/' + name + '/' + filename)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
result
|
105
|
+
end
|
106
|
+
|
107
|
+
private :build_metadata
|
108
|
+
|
109
|
+
def new_snapshot
|
110
|
+
today = Time.new.strftime('%Y_%m_%d')
|
111
|
+
snapname = 'cass_snap_' + today
|
112
|
+
|
113
|
+
nodetool_snapshot(snapname)
|
114
|
+
metadata = build_metadata(snapname)
|
115
|
+
|
116
|
+
CassandraSnapshot.new(@cluster_name, @node_name, today, metadata)
|
117
|
+
end
|
118
|
+
|
119
|
+
def delete_snapshot(snapshot)
|
120
|
+
snapname = 'cass_snap_' + snapshot.date
|
121
|
+
nodetool_clearsnapshot(snapname)
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
class CassandraSnapshot
|
126
|
+
attr_reader :cluster, :node, :date, :metadata
|
127
|
+
|
128
|
+
def initialize(cluster, node, date, metadata = nil)
|
129
|
+
@cluster = cluster
|
130
|
+
@node = node
|
131
|
+
@date = date
|
132
|
+
@metadata = if metadata.nil?
|
133
|
+
Set.new
|
134
|
+
else
|
135
|
+
metadata
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
def to_s
|
140
|
+
"[#{@cluster}|#{@node}|#{@date}]"
|
141
|
+
end
|
142
|
+
|
143
|
+
def ==(other)
|
144
|
+
@cluster == other.cluster && @node == other.node && @date == other.date
|
145
|
+
end
|
146
|
+
|
147
|
+
def <=>(other)
|
148
|
+
c = @cluster <=> other.cluster
|
149
|
+
n = @node <=> other.node
|
150
|
+
d = @date <=> other.date
|
151
|
+
c * 3 + n * 2 + d
|
152
|
+
end
|
153
|
+
end
|
data/lib/hadoop.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'webhdfs'
|
2
|
+
require 'webhdfs/fileutils'
|
3
|
+
|
4
|
+
WebHDFS::ClientV1::REDIRECTED_OPERATIONS.delete('OPEN')
|
5
|
+
|
6
|
+
class Hadoop < WebHDFS::Client
|
7
|
+
attr_reader :base_dir
|
8
|
+
|
9
|
+
def initialize(host: 'localhost', port: 14_000, base_dir: '/')
|
10
|
+
super(host = host, port = port)
|
11
|
+
@kerberos = true
|
12
|
+
@base_dir = base_dir
|
13
|
+
end
|
14
|
+
end
|
metadata
ADDED
@@ -0,0 +1,121 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: cassback
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Vincent Van Hollebeke
|
8
|
+
- Bogdan Niculescu
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2016-04-20 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: bundler
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - "~>"
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '1.11'
|
21
|
+
type: :development
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - "~>"
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: '1.11'
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: rake
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - "~>"
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '10.0'
|
35
|
+
type: :development
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - "~>"
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '10.0'
|
42
|
+
- !ruby/object:Gem::Dependency
|
43
|
+
name: webhdfs
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - "~>"
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: '0.8'
|
49
|
+
- - ">="
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
version: 0.8.0
|
52
|
+
type: :runtime
|
53
|
+
prerelease: false
|
54
|
+
version_requirements: !ruby/object:Gem::Requirement
|
55
|
+
requirements:
|
56
|
+
- - "~>"
|
57
|
+
- !ruby/object:Gem::Version
|
58
|
+
version: '0.8'
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 0.8.0
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: table_print
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '1.5'
|
69
|
+
- - ">="
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: 1.5.6
|
72
|
+
type: :runtime
|
73
|
+
prerelease: false
|
74
|
+
version_requirements: !ruby/object:Gem::Requirement
|
75
|
+
requirements:
|
76
|
+
- - "~>"
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
version: '1.5'
|
79
|
+
- - ">="
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: 1.5.6
|
82
|
+
description: This is a tool that allows creating backups of Cassandra and pushing
|
83
|
+
them into HDFS.
|
84
|
+
email:
|
85
|
+
- v.vanhollebeke@criteo.com
|
86
|
+
- b.niculescu@criteo.com
|
87
|
+
executables:
|
88
|
+
- cassback.rb
|
89
|
+
extensions: []
|
90
|
+
extra_rdoc_files: []
|
91
|
+
files:
|
92
|
+
- bin/cassback.rb
|
93
|
+
- lib/backuptool.rb
|
94
|
+
- lib/cassandra.rb
|
95
|
+
- lib/hadoop.rb
|
96
|
+
homepage: http://rubygems.org/gems/cassback
|
97
|
+
licenses:
|
98
|
+
- Apache2
|
99
|
+
metadata: {}
|
100
|
+
post_install_message:
|
101
|
+
rdoc_options: []
|
102
|
+
require_paths:
|
103
|
+
- lib
|
104
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
105
|
+
requirements:
|
106
|
+
- - ">="
|
107
|
+
- !ruby/object:Gem::Version
|
108
|
+
version: '0'
|
109
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
110
|
+
requirements:
|
111
|
+
- - ">="
|
112
|
+
- !ruby/object:Gem::Version
|
113
|
+
version: '0'
|
114
|
+
requirements: []
|
115
|
+
rubyforge_project:
|
116
|
+
rubygems_version: 2.5.2
|
117
|
+
signing_key:
|
118
|
+
specification_version: 4
|
119
|
+
summary: Cassandra backup to HDFS.
|
120
|
+
test_files: []
|
121
|
+
has_rdoc:
|