cassback 0.2.4 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/backuptool.rb +115 -90
- data/lib/cassandra.rb +1 -1
- data/lib/cassback/version.rb +1 -1
- data/test/hadoop_stub.rb +4 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5922de1fa29428f24e257451d544e2e04e660d96
|
4
|
+
data.tar.gz: e40787d17308c083cf302b38c0153f169025aaff
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bbd25e253aa0ef1c31b12cc83199e4b3d45d60fa7debd68b91477221e58c8b30419250ebf28c4c52ec35207a554e05bbe53a305e7da1ee33d5194d83e1419c19
|
7
|
+
data.tar.gz: b76a37273cdae0d46a3c1d2f604fb9016513b0a01e28384702e9ad2d25497e8c88df27e41ab2c4f2aa0105288f91c9617b6eaa125d401c8cc50db3091da6eebc
|
data/lib/backuptool.rb
CHANGED
@@ -22,59 +22,78 @@ class BackupTool
|
|
22
22
|
@metadir = META_DIR
|
23
23
|
end
|
24
24
|
|
25
|
+
def metadata_dir_for_backup(node, date)
|
26
|
+
return metadata_dir() + node + '/cass_snap_' + date + '/'
|
27
|
+
end
|
28
|
+
|
29
|
+
def metadata_dir_for_node(node)
|
30
|
+
return metadata_dir() + node + '/'
|
31
|
+
end
|
32
|
+
|
33
|
+
def metadata_dir()
|
34
|
+
return @hadoop.base_dir + '/' + @metadir + '/' + @cassandra.cluster_name + '/'
|
35
|
+
end
|
36
|
+
|
37
|
+
def data_dir_for_node(node)
|
38
|
+
return @hadoop.base_dir + '/' + @cassandra.cluster_name + '/' + node + '/'
|
39
|
+
end
|
40
|
+
|
25
41
|
# Look for snapshots
|
26
42
|
# * *Args* :
|
27
43
|
# - +node+ -> Cassandra node name
|
28
44
|
# - +date+ -> HDFS instance
|
29
45
|
def search_snapshots(node: 'ALL', date: 'ALL')
|
30
|
-
result = []
|
31
|
-
|
32
|
-
def get_snapshot_metadata(node, date)
|
33
|
-
remote = @hadoop.base_dir + '/' + @metadir + '/' + @cassandra.cluster_name + '/' + node + '/cass_snap_' + date
|
34
|
-
return @hadoop.read(remote).split("\n").to_set
|
35
|
-
rescue Exception => e
|
36
|
-
raise("Could not read metadata : #{e.message}")
|
37
|
-
end
|
38
46
|
|
47
|
+
# Look for all snapshots already existing for "node" at time "date"
|
39
48
|
def get_snapshots_node(node, date)
|
40
|
-
|
49
|
+
results = []
|
50
|
+
dates = [date]
|
41
51
|
begin
|
42
52
|
if date == 'ALL'
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
date = item['pathSuffix'].gsub('cass_snap_', '')
|
47
|
-
metadata = get_snapshot_metadata(node, date)
|
48
|
-
snapshot = CassandraSnapshot.new(@cassandra.cluster_name, node, date, metadata)
|
49
|
-
result.push(snapshot)
|
50
|
-
end
|
51
|
-
else
|
52
|
-
metadata = get_snapshot_metadata(node, date)
|
53
|
-
snapshot = CassandraSnapshot.new(@cassandra.cluster_name, node, date, metadata)
|
54
|
-
result.push(snapshot)
|
53
|
+
dates = @hadoop.list(metadata_dir_for_node(node))
|
54
|
+
.select { |dir| dir['pathSuffix'].include? 'cass_snap_' }
|
55
|
+
.map { |dir| dir['pathSuffix'].gsub('cass_snap_', '')}
|
55
56
|
end
|
57
|
+
|
58
|
+
dates.each do |date|
|
59
|
+
metadata = @hadoop.read(metadata_dir_for_backup(node, date)).split("\n").to_set
|
60
|
+
results.push(CassandraSnapshot.new(@cassandra.cluster_name, node, date, metadata))
|
61
|
+
end
|
62
|
+
|
56
63
|
rescue Exception => e
|
57
64
|
@logger.warn("Could not get snapshots for node #{node} : #{e.message}")
|
58
65
|
end
|
59
|
-
|
66
|
+
|
67
|
+
return results
|
60
68
|
end
|
61
69
|
|
62
|
-
|
70
|
+
# Get the list of nodes
|
71
|
+
def get_node_list(node)
|
72
|
+
if node != 'ALL'
|
73
|
+
return [node]
|
74
|
+
end
|
75
|
+
|
76
|
+
nodes = []
|
63
77
|
begin
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
result += get_snapshots_node(n, date)
|
69
|
-
end
|
78
|
+
nodes = @hadoop.list(metadata_dir())
|
79
|
+
.select { |item| item['type'].casecmp('DIRECTORY') == 0 }
|
80
|
+
.map { |item| item['pathSuffix'] }
|
81
|
+
.flatten
|
70
82
|
rescue Exception => e
|
71
|
-
@logger.warn("Could not get
|
83
|
+
@logger.warn("Could not get node list for cluster #{@cassandra.cluster_name} : #{e.message}")
|
72
84
|
end
|
73
|
-
|
74
|
-
|
85
|
+
|
86
|
+
return nodes
|
75
87
|
end
|
76
88
|
|
77
|
-
|
89
|
+
|
90
|
+
# RUN
|
91
|
+
@logger.info("Searching snapshots for #{node} at time #{date}")
|
92
|
+
snapshots = get_node_list(node).map { |node| get_snapshots_node(node, date) }
|
93
|
+
.flatten
|
94
|
+
.sort
|
95
|
+
@logger.info("Found #{snapshots.length} snapshots")
|
96
|
+
return snapshots
|
78
97
|
end
|
79
98
|
|
80
99
|
def list_snapshots(node: @cassandra.node_name)
|
@@ -83,31 +102,40 @@ class BackupTool
|
|
83
102
|
tp(snapshots, 'cluster', 'node', 'date')
|
84
103
|
end
|
85
104
|
|
86
|
-
def
|
87
|
-
@logger.info('
|
88
|
-
snapshot = @cassandra.new_snapshot
|
89
|
-
|
105
|
+
def prepare_hdfs_dirs(node)
|
106
|
+
@logger.info(':::::::: Prepare HDFS ::::::::')
|
90
107
|
begin
|
91
|
-
|
92
|
-
|
93
|
-
|
108
|
+
paths = [data_dir_for_node(node), metadata_dir_for_node(node)]
|
109
|
+
paths.each do |path|
|
110
|
+
@logger.info("Creating destination directory " + path)
|
111
|
+
if not @hadoop.mkdir(path)
|
112
|
+
raise
|
113
|
+
end
|
94
114
|
end
|
95
115
|
rescue Exception => e
|
96
116
|
raise("Could not create your cluster directory : #{e.message}")
|
97
117
|
end
|
118
|
+
end
|
98
119
|
|
120
|
+
def new_snapshot
|
121
|
+
@logger.info(':::::::: Creating new snapshot ::::::::')
|
122
|
+
snapshot = @cassandra.new_snapshot
|
123
|
+
|
124
|
+
prepare_hdfs_dirs(snapshot.node)
|
125
|
+
|
126
|
+
@logger.info(':::::::: Get last backup ::::::::')
|
99
127
|
existing = search_snapshots(node: snapshot.node)
|
100
128
|
last = if existing.empty?
|
101
|
-
|
102
|
-
else
|
103
|
-
|
104
|
-
end
|
105
|
-
|
106
|
-
@logger.info('Uploading tables to Hadoop')
|
129
|
+
then CassandraSnapshot.new(snapshot.cluster, snapshot.node, 'never')
|
130
|
+
else existing[-1] end
|
131
|
+
@logger.info("Last snapshot is #{last}")
|
107
132
|
files = snapshot.metadata - last.metadata
|
108
133
|
@logger.info("#{files.length} files to upload")
|
134
|
+
|
135
|
+
|
136
|
+
@logger.info('::::::: Uploading tables to HDFS ::::::')
|
109
137
|
index = 0
|
110
|
-
number_of_files = files.
|
138
|
+
number_of_files = files.length
|
111
139
|
total_file_size = 0
|
112
140
|
files.each do |file|
|
113
141
|
index += 1
|
@@ -115,26 +143,27 @@ class BackupTool
|
|
115
143
|
local_file_size = File.size(local)
|
116
144
|
total_file_size += local_file_size
|
117
145
|
pretty_size = Filesize.from("#{local_file_size} B").pretty
|
118
|
-
@logger.info("Sending file #{index}/#{number_of_files} #{file} having size #{pretty_size} to
|
119
|
-
remote =
|
146
|
+
@logger.info("Sending file #{index}/#{number_of_files} #{file} having size #{pretty_size} to HDFS")
|
147
|
+
remote = data_dir_for_node(snapshot.node) + file
|
120
148
|
@logger.debug("#{local} => #{remote}")
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
149
|
+
File.open(local, 'r') do |f|
|
150
|
+
begin
|
151
|
+
retries = 3
|
152
|
+
@hadoop.create(remote, f, overwrite: true)
|
153
|
+
rescue Exception => e
|
154
|
+
@logger.info("HDFS write failed: #{e.message}")
|
155
|
+
@logger.info("HDFS write retrying in 1s")
|
156
|
+
sleep 1
|
157
|
+
retry if (retries -= 1) < 0
|
158
|
+
end
|
129
159
|
end
|
130
|
-
f.close
|
131
160
|
end
|
132
161
|
|
133
162
|
total_file_size_pretty = Filesize.from("#{total_file_size} B").pretty
|
134
163
|
@logger.info("Total size of uploaded files is #{total_file_size_pretty}")
|
135
164
|
|
136
|
-
@logger.info('Sending metadata to
|
137
|
-
remote =
|
165
|
+
@logger.info('Sending metadata to HDFS')
|
166
|
+
remote = metadata_dir_for_backup(snapshot.node, snapshot.date)
|
138
167
|
@logger.debug("metadata => #{remote}")
|
139
168
|
@hadoop.create(remote, snapshot.metadata.to_a * "\n", overwrite: true)
|
140
169
|
|
@@ -146,27 +175,27 @@ class BackupTool
|
|
146
175
|
snapshots = search_snapshots(node: node, date: date)
|
147
176
|
if snapshots.empty?
|
148
177
|
raise('No snapshot found for deletion')
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
@hadoop.delete(remote)
|
164
|
-
end
|
165
|
-
@logger.info('Deleting metadata in Hadoop')
|
166
|
-
remote = @hadoop.base_dir + '/' + @metadir + '/' + snapshot.cluster + '/' + snapshot.node + '/cass_snap_' + snapshot.date
|
178
|
+
end
|
179
|
+
|
180
|
+
snapshots.each do |snapshot|
|
181
|
+
@logger.info("Deleting snapshot #{snapshot}")
|
182
|
+
node_snapshots = search_snapshots(node: snapshot.node)
|
183
|
+
merged_metadata = Set.new
|
184
|
+
node_snapshots.each do |s|
|
185
|
+
merged_metadata += s.metadata if s != snapshot
|
186
|
+
end
|
187
|
+
files = snapshot.metadata - merged_metadata
|
188
|
+
@logger.info("#{files.length} files to delete")
|
189
|
+
files.each do |file|
|
190
|
+
@logger.info("Deleting file #{file}")
|
191
|
+
remote = data_dir_for_node(snapshot.node) + '/' + file
|
167
192
|
@logger.debug("DELETE => #{remote}")
|
168
193
|
@hadoop.delete(remote)
|
169
194
|
end
|
195
|
+
@logger.info('Deleting metadata in HDFS')
|
196
|
+
remote = metadata_dir_for_backup(snapshot.node, snapshot.date)
|
197
|
+
@logger.debug("DELETE => #{remote}")
|
198
|
+
@hadoop.delete(remote)
|
170
199
|
end
|
171
200
|
end
|
172
201
|
|
@@ -177,7 +206,7 @@ class BackupTool
|
|
177
206
|
@logger.info("Cleaning backup data on all nodes before #{retention_date}.")
|
178
207
|
|
179
208
|
all_snapshots = search_snapshots
|
180
|
-
@logger.info("A total of #{all_snapshots.size} snapshots were found on
|
209
|
+
@logger.info("A total of #{all_snapshots.size} snapshots were found on HDFS.")
|
181
210
|
|
182
211
|
snapshots_to_be_deleted = all_snapshots.select { |snapshot| snapshot.get_date < retention_date }
|
183
212
|
@logger.info("A total of #{snapshots_to_be_deleted.size} snapshots will be deleted.")
|
@@ -187,14 +216,13 @@ class BackupTool
|
|
187
216
|
end
|
188
217
|
|
189
218
|
all_backup_flags = get_backup_flags
|
190
|
-
@logger.info("A total of #{all_backup_flags.size} back up flags were found on
|
219
|
+
@logger.info("A total of #{all_backup_flags.size} back up flags were found on HDFS.")
|
191
220
|
|
192
221
|
backup_flags_to_be_delete = all_backup_flags.select { |flag| flag.date < retention_date }
|
193
222
|
@logger.info("A total of #{backup_flags_to_be_delete.size} backup flags will be deleted.")
|
194
223
|
|
195
|
-
backup_flags_location = @hadoop.base_dir + '/' + @metadir + '/' + @cassandra.cluster_name
|
196
224
|
backup_flags_to_be_delete.each do |flag|
|
197
|
-
file =
|
225
|
+
file = metadata_dir() + flag.file
|
198
226
|
@logger.info("Deleting #{file}")
|
199
227
|
@hadoop.delete(file)
|
200
228
|
end
|
@@ -204,19 +232,16 @@ class BackupTool
|
|
204
232
|
# This is an individual command that has to be called manually after snapshots have finished
|
205
233
|
def create_backup_flag(date)
|
206
234
|
file_name = 'BACKUP_COMPLETED_' + date
|
207
|
-
remote_file =
|
235
|
+
remote_file = metadata_dir() + file_name
|
208
236
|
|
209
237
|
@logger.info('Setting backup completed flag : ' + remote_file)
|
210
238
|
@hadoop.create(remote_file, '', overwrite: true)
|
211
239
|
end
|
212
240
|
|
213
241
|
def get_backup_flags
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
backup_flags.collect do |file|
|
218
|
-
BackupFlag.new(@cassandra.cluster_name, file['pathSuffix'])
|
219
|
-
end
|
242
|
+
@hadoop.list(metadata_dir())
|
243
|
+
.select { |item| item['pathSuffix'].include? 'BACKUP_COMPLETED_' }
|
244
|
+
.collect { |file| BackupFlag.new(@cassandra.cluster_name, file['pathSuffix']) }
|
220
245
|
end
|
221
246
|
|
222
247
|
# Download a file from HDFS, buffered way
|
@@ -279,7 +304,7 @@ class BackupTool
|
|
279
304
|
files_to_be_restored.each do |file|
|
280
305
|
@logger.info("Restoring file #{file}")
|
281
306
|
local = destination + '/' + file
|
282
|
-
remote =
|
307
|
+
remote = data_dir_for_node(snapshot.node) + file
|
283
308
|
# Download the file from hdfs
|
284
309
|
buffered_download(remote, local)
|
285
310
|
end
|
data/lib/cassandra.rb
CHANGED
@@ -89,7 +89,7 @@ class Cassandra
|
|
89
89
|
def get_keyspaces_and_tables
|
90
90
|
result = {}
|
91
91
|
Dir.foreach(@data_path) do |keyspace|
|
92
|
-
next if keyspace == '.' || keyspace == '..'
|
92
|
+
next if keyspace == '.' || keyspace == '..' || !Dir.exist?(@data_path + '/' + keyspace)
|
93
93
|
result[keyspace] = []
|
94
94
|
Dir.foreach(@data_path + '/' + keyspace) do |table|
|
95
95
|
next if table == '.' || table == '..'
|
data/lib/cassback/version.rb
CHANGED
data/test/hadoop_stub.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cassback
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Vincent Van Hollebeke
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2017-01-
|
12
|
+
date: 2017-01-23 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|