cassback 0.2.4 → 0.2.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/backuptool.rb +115 -90
- data/lib/cassandra.rb +1 -1
- data/lib/cassback/version.rb +1 -1
- data/test/hadoop_stub.rb +4 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5922de1fa29428f24e257451d544e2e04e660d96
|
4
|
+
data.tar.gz: e40787d17308c083cf302b38c0153f169025aaff
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bbd25e253aa0ef1c31b12cc83199e4b3d45d60fa7debd68b91477221e58c8b30419250ebf28c4c52ec35207a554e05bbe53a305e7da1ee33d5194d83e1419c19
|
7
|
+
data.tar.gz: b76a37273cdae0d46a3c1d2f604fb9016513b0a01e28384702e9ad2d25497e8c88df27e41ab2c4f2aa0105288f91c9617b6eaa125d401c8cc50db3091da6eebc
|
data/lib/backuptool.rb
CHANGED
@@ -22,59 +22,78 @@ class BackupTool
|
|
22
22
|
@metadir = META_DIR
|
23
23
|
end
|
24
24
|
|
25
|
+
def metadata_dir_for_backup(node, date)
|
26
|
+
return metadata_dir() + node + '/cass_snap_' + date + '/'
|
27
|
+
end
|
28
|
+
|
29
|
+
def metadata_dir_for_node(node)
|
30
|
+
return metadata_dir() + node + '/'
|
31
|
+
end
|
32
|
+
|
33
|
+
def metadata_dir()
|
34
|
+
return @hadoop.base_dir + '/' + @metadir + '/' + @cassandra.cluster_name + '/'
|
35
|
+
end
|
36
|
+
|
37
|
+
def data_dir_for_node(node)
|
38
|
+
return @hadoop.base_dir + '/' + @cassandra.cluster_name + '/' + node + '/'
|
39
|
+
end
|
40
|
+
|
25
41
|
# Look for snapshots
|
26
42
|
# * *Args* :
|
27
43
|
# - +node+ -> Cassandra node name
|
28
44
|
# - +date+ -> HDFS instance
|
29
45
|
def search_snapshots(node: 'ALL', date: 'ALL')
|
30
|
-
result = []
|
31
|
-
|
32
|
-
def get_snapshot_metadata(node, date)
|
33
|
-
remote = @hadoop.base_dir + '/' + @metadir + '/' + @cassandra.cluster_name + '/' + node + '/cass_snap_' + date
|
34
|
-
return @hadoop.read(remote).split("\n").to_set
|
35
|
-
rescue Exception => e
|
36
|
-
raise("Could not read metadata : #{e.message}")
|
37
|
-
end
|
38
46
|
|
47
|
+
# Look for all snapshots already existing for "node" at time "date"
|
39
48
|
def get_snapshots_node(node, date)
|
40
|
-
|
49
|
+
results = []
|
50
|
+
dates = [date]
|
41
51
|
begin
|
42
52
|
if date == 'ALL'
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
date = item['pathSuffix'].gsub('cass_snap_', '')
|
47
|
-
metadata = get_snapshot_metadata(node, date)
|
48
|
-
snapshot = CassandraSnapshot.new(@cassandra.cluster_name, node, date, metadata)
|
49
|
-
result.push(snapshot)
|
50
|
-
end
|
51
|
-
else
|
52
|
-
metadata = get_snapshot_metadata(node, date)
|
53
|
-
snapshot = CassandraSnapshot.new(@cassandra.cluster_name, node, date, metadata)
|
54
|
-
result.push(snapshot)
|
53
|
+
dates = @hadoop.list(metadata_dir_for_node(node))
|
54
|
+
.select { |dir| dir['pathSuffix'].include? 'cass_snap_' }
|
55
|
+
.map { |dir| dir['pathSuffix'].gsub('cass_snap_', '')}
|
55
56
|
end
|
57
|
+
|
58
|
+
dates.each do |date|
|
59
|
+
metadata = @hadoop.read(metadata_dir_for_backup(node, date)).split("\n").to_set
|
60
|
+
results.push(CassandraSnapshot.new(@cassandra.cluster_name, node, date, metadata))
|
61
|
+
end
|
62
|
+
|
56
63
|
rescue Exception => e
|
57
64
|
@logger.warn("Could not get snapshots for node #{node} : #{e.message}")
|
58
65
|
end
|
59
|
-
|
66
|
+
|
67
|
+
return results
|
60
68
|
end
|
61
69
|
|
62
|
-
|
70
|
+
# Get the list of nodes
|
71
|
+
def get_node_list(node)
|
72
|
+
if node != 'ALL'
|
73
|
+
return [node]
|
74
|
+
end
|
75
|
+
|
76
|
+
nodes = []
|
63
77
|
begin
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
result += get_snapshots_node(n, date)
|
69
|
-
end
|
78
|
+
nodes = @hadoop.list(metadata_dir())
|
79
|
+
.select { |item| item['type'].casecmp('DIRECTORY') == 0 }
|
80
|
+
.map { |item| item['pathSuffix'] }
|
81
|
+
.flatten
|
70
82
|
rescue Exception => e
|
71
|
-
@logger.warn("Could not get
|
83
|
+
@logger.warn("Could not get node list for cluster #{@cassandra.cluster_name} : #{e.message}")
|
72
84
|
end
|
73
|
-
|
74
|
-
|
85
|
+
|
86
|
+
return nodes
|
75
87
|
end
|
76
88
|
|
77
|
-
|
89
|
+
|
90
|
+
# RUN
|
91
|
+
@logger.info("Searching snapshots for #{node} at time #{date}")
|
92
|
+
snapshots = get_node_list(node).map { |node| get_snapshots_node(node, date) }
|
93
|
+
.flatten
|
94
|
+
.sort
|
95
|
+
@logger.info("Found #{snapshots.length} snapshots")
|
96
|
+
return snapshots
|
78
97
|
end
|
79
98
|
|
80
99
|
def list_snapshots(node: @cassandra.node_name)
|
@@ -83,31 +102,40 @@ class BackupTool
|
|
83
102
|
tp(snapshots, 'cluster', 'node', 'date')
|
84
103
|
end
|
85
104
|
|
86
|
-
def
|
87
|
-
@logger.info('
|
88
|
-
snapshot = @cassandra.new_snapshot
|
89
|
-
|
105
|
+
def prepare_hdfs_dirs(node)
|
106
|
+
@logger.info(':::::::: Prepare HDFS ::::::::')
|
90
107
|
begin
|
91
|
-
|
92
|
-
|
93
|
-
|
108
|
+
paths = [data_dir_for_node(node), metadata_dir_for_node(node)]
|
109
|
+
paths.each do |path|
|
110
|
+
@logger.info("Creating destination directory " + path)
|
111
|
+
if not @hadoop.mkdir(path)
|
112
|
+
raise
|
113
|
+
end
|
94
114
|
end
|
95
115
|
rescue Exception => e
|
96
116
|
raise("Could not create your cluster directory : #{e.message}")
|
97
117
|
end
|
118
|
+
end
|
98
119
|
|
120
|
+
def new_snapshot
|
121
|
+
@logger.info(':::::::: Creating new snapshot ::::::::')
|
122
|
+
snapshot = @cassandra.new_snapshot
|
123
|
+
|
124
|
+
prepare_hdfs_dirs(snapshot.node)
|
125
|
+
|
126
|
+
@logger.info(':::::::: Get last backup ::::::::')
|
99
127
|
existing = search_snapshots(node: snapshot.node)
|
100
128
|
last = if existing.empty?
|
101
|
-
|
102
|
-
else
|
103
|
-
|
104
|
-
end
|
105
|
-
|
106
|
-
@logger.info('Uploading tables to Hadoop')
|
129
|
+
then CassandraSnapshot.new(snapshot.cluster, snapshot.node, 'never')
|
130
|
+
else existing[-1] end
|
131
|
+
@logger.info("Last snapshot is #{last}")
|
107
132
|
files = snapshot.metadata - last.metadata
|
108
133
|
@logger.info("#{files.length} files to upload")
|
134
|
+
|
135
|
+
|
136
|
+
@logger.info('::::::: Uploading tables to HDFS ::::::')
|
109
137
|
index = 0
|
110
|
-
number_of_files = files.
|
138
|
+
number_of_files = files.length
|
111
139
|
total_file_size = 0
|
112
140
|
files.each do |file|
|
113
141
|
index += 1
|
@@ -115,26 +143,27 @@ class BackupTool
|
|
115
143
|
local_file_size = File.size(local)
|
116
144
|
total_file_size += local_file_size
|
117
145
|
pretty_size = Filesize.from("#{local_file_size} B").pretty
|
118
|
-
@logger.info("Sending file #{index}/#{number_of_files} #{file} having size #{pretty_size} to
|
119
|
-
remote =
|
146
|
+
@logger.info("Sending file #{index}/#{number_of_files} #{file} having size #{pretty_size} to HDFS")
|
147
|
+
remote = data_dir_for_node(snapshot.node) + file
|
120
148
|
@logger.debug("#{local} => #{remote}")
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
149
|
+
File.open(local, 'r') do |f|
|
150
|
+
begin
|
151
|
+
retries = 3
|
152
|
+
@hadoop.create(remote, f, overwrite: true)
|
153
|
+
rescue Exception => e
|
154
|
+
@logger.info("HDFS write failed: #{e.message}")
|
155
|
+
@logger.info("HDFS write retrying in 1s")
|
156
|
+
sleep 1
|
157
|
+
retry if (retries -= 1) < 0
|
158
|
+
end
|
129
159
|
end
|
130
|
-
f.close
|
131
160
|
end
|
132
161
|
|
133
162
|
total_file_size_pretty = Filesize.from("#{total_file_size} B").pretty
|
134
163
|
@logger.info("Total size of uploaded files is #{total_file_size_pretty}")
|
135
164
|
|
136
|
-
@logger.info('Sending metadata to
|
137
|
-
remote =
|
165
|
+
@logger.info('Sending metadata to HDFS')
|
166
|
+
remote = metadata_dir_for_backup(snapshot.node, snapshot.date)
|
138
167
|
@logger.debug("metadata => #{remote}")
|
139
168
|
@hadoop.create(remote, snapshot.metadata.to_a * "\n", overwrite: true)
|
140
169
|
|
@@ -146,27 +175,27 @@ class BackupTool
|
|
146
175
|
snapshots = search_snapshots(node: node, date: date)
|
147
176
|
if snapshots.empty?
|
148
177
|
raise('No snapshot found for deletion')
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
@hadoop.delete(remote)
|
164
|
-
end
|
165
|
-
@logger.info('Deleting metadata in Hadoop')
|
166
|
-
remote = @hadoop.base_dir + '/' + @metadir + '/' + snapshot.cluster + '/' + snapshot.node + '/cass_snap_' + snapshot.date
|
178
|
+
end
|
179
|
+
|
180
|
+
snapshots.each do |snapshot|
|
181
|
+
@logger.info("Deleting snapshot #{snapshot}")
|
182
|
+
node_snapshots = search_snapshots(node: snapshot.node)
|
183
|
+
merged_metadata = Set.new
|
184
|
+
node_snapshots.each do |s|
|
185
|
+
merged_metadata += s.metadata if s != snapshot
|
186
|
+
end
|
187
|
+
files = snapshot.metadata - merged_metadata
|
188
|
+
@logger.info("#{files.length} files to delete")
|
189
|
+
files.each do |file|
|
190
|
+
@logger.info("Deleting file #{file}")
|
191
|
+
remote = data_dir_for_node(snapshot.node) + '/' + file
|
167
192
|
@logger.debug("DELETE => #{remote}")
|
168
193
|
@hadoop.delete(remote)
|
169
194
|
end
|
195
|
+
@logger.info('Deleting metadata in HDFS')
|
196
|
+
remote = metadata_dir_for_backup(snapshot.node, snapshot.date)
|
197
|
+
@logger.debug("DELETE => #{remote}")
|
198
|
+
@hadoop.delete(remote)
|
170
199
|
end
|
171
200
|
end
|
172
201
|
|
@@ -177,7 +206,7 @@ class BackupTool
|
|
177
206
|
@logger.info("Cleaning backup data on all nodes before #{retention_date}.")
|
178
207
|
|
179
208
|
all_snapshots = search_snapshots
|
180
|
-
@logger.info("A total of #{all_snapshots.size} snapshots were found on
|
209
|
+
@logger.info("A total of #{all_snapshots.size} snapshots were found on HDFS.")
|
181
210
|
|
182
211
|
snapshots_to_be_deleted = all_snapshots.select { |snapshot| snapshot.get_date < retention_date }
|
183
212
|
@logger.info("A total of #{snapshots_to_be_deleted.size} snapshots will be deleted.")
|
@@ -187,14 +216,13 @@ class BackupTool
|
|
187
216
|
end
|
188
217
|
|
189
218
|
all_backup_flags = get_backup_flags
|
190
|
-
@logger.info("A total of #{all_backup_flags.size} back up flags were found on
|
219
|
+
@logger.info("A total of #{all_backup_flags.size} back up flags were found on HDFS.")
|
191
220
|
|
192
221
|
backup_flags_to_be_delete = all_backup_flags.select { |flag| flag.date < retention_date }
|
193
222
|
@logger.info("A total of #{backup_flags_to_be_delete.size} backup flags will be deleted.")
|
194
223
|
|
195
|
-
backup_flags_location = @hadoop.base_dir + '/' + @metadir + '/' + @cassandra.cluster_name
|
196
224
|
backup_flags_to_be_delete.each do |flag|
|
197
|
-
file =
|
225
|
+
file = metadata_dir() + flag.file
|
198
226
|
@logger.info("Deleting #{file}")
|
199
227
|
@hadoop.delete(file)
|
200
228
|
end
|
@@ -204,19 +232,16 @@ class BackupTool
|
|
204
232
|
# This is an individual command that has to be called manually after snapshots have finished
|
205
233
|
def create_backup_flag(date)
|
206
234
|
file_name = 'BACKUP_COMPLETED_' + date
|
207
|
-
remote_file =
|
235
|
+
remote_file = metadata_dir() + file_name
|
208
236
|
|
209
237
|
@logger.info('Setting backup completed flag : ' + remote_file)
|
210
238
|
@hadoop.create(remote_file, '', overwrite: true)
|
211
239
|
end
|
212
240
|
|
213
241
|
def get_backup_flags
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
backup_flags.collect do |file|
|
218
|
-
BackupFlag.new(@cassandra.cluster_name, file['pathSuffix'])
|
219
|
-
end
|
242
|
+
@hadoop.list(metadata_dir())
|
243
|
+
.select { |item| item['pathSuffix'].include? 'BACKUP_COMPLETED_' }
|
244
|
+
.collect { |file| BackupFlag.new(@cassandra.cluster_name, file['pathSuffix']) }
|
220
245
|
end
|
221
246
|
|
222
247
|
# Download a file from HDFS, buffered way
|
@@ -279,7 +304,7 @@ class BackupTool
|
|
279
304
|
files_to_be_restored.each do |file|
|
280
305
|
@logger.info("Restoring file #{file}")
|
281
306
|
local = destination + '/' + file
|
282
|
-
remote =
|
307
|
+
remote = data_dir_for_node(snapshot.node) + file
|
283
308
|
# Download the file from hdfs
|
284
309
|
buffered_download(remote, local)
|
285
310
|
end
|
data/lib/cassandra.rb
CHANGED
@@ -89,7 +89,7 @@ class Cassandra
|
|
89
89
|
def get_keyspaces_and_tables
|
90
90
|
result = {}
|
91
91
|
Dir.foreach(@data_path) do |keyspace|
|
92
|
-
next if keyspace == '.' || keyspace == '..'
|
92
|
+
next if keyspace == '.' || keyspace == '..' || !Dir.exist?(@data_path + '/' + keyspace)
|
93
93
|
result[keyspace] = []
|
94
94
|
Dir.foreach(@data_path + '/' + keyspace) do |table|
|
95
95
|
next if table == '.' || table == '..'
|
data/lib/cassback/version.rb
CHANGED
data/test/hadoop_stub.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cassback
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Vincent Van Hollebeke
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2017-01-
|
12
|
+
date: 2017-01-23 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|