cassback 0.2.4 → 0.2.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f0fb33ef12a8632d9c1af04424c10dc15b9ec312
4
- data.tar.gz: 96c27b901cfd9dca3e575ebff41fbbd2d09e9ad0
3
+ metadata.gz: 5922de1fa29428f24e257451d544e2e04e660d96
4
+ data.tar.gz: e40787d17308c083cf302b38c0153f169025aaff
5
5
  SHA512:
6
- metadata.gz: c5f859dcc38bced28741136bd11da2a3cf9fa8432de8082d3f8de1dc3eef849a6fc9483f7ffe1371b5cd95d9689683293b2f3bcd20749738cca5d5936af22454
7
- data.tar.gz: 570d68977b101c76e4749dbe098cb83776a95fa3716f7dec1c3505b67bd5c2ba2eb329a92268bd05a41a88c6eab856da37cf0212c9b46c67b3bc7574d7ad4678
6
+ metadata.gz: bbd25e253aa0ef1c31b12cc83199e4b3d45d60fa7debd68b91477221e58c8b30419250ebf28c4c52ec35207a554e05bbe53a305e7da1ee33d5194d83e1419c19
7
+ data.tar.gz: b76a37273cdae0d46a3c1d2f604fb9016513b0a01e28384702e9ad2d25497e8c88df27e41ab2c4f2aa0105288f91c9617b6eaa125d401c8cc50db3091da6eebc
data/lib/backuptool.rb CHANGED
@@ -22,59 +22,78 @@ class BackupTool
22
22
  @metadir = META_DIR
23
23
  end
24
24
 
25
+ def metadata_dir_for_backup(node, date)
26
+ return metadata_dir() + node + '/cass_snap_' + date + '/'
27
+ end
28
+
29
+ def metadata_dir_for_node(node)
30
+ return metadata_dir() + node + '/'
31
+ end
32
+
33
+ def metadata_dir()
34
+ return @hadoop.base_dir + '/' + @metadir + '/' + @cassandra.cluster_name + '/'
35
+ end
36
+
37
+ def data_dir_for_node(node)
38
+ return @hadoop.base_dir + '/' + @cassandra.cluster_name + '/' + node + '/'
39
+ end
40
+
25
41
  # Look for snapshots
26
42
  # * *Args* :
27
43
  # - +node+ -> Cassandra node name
28
44
  # - +date+ -> HDFS instance
29
45
  def search_snapshots(node: 'ALL', date: 'ALL')
30
- result = []
31
-
32
- def get_snapshot_metadata(node, date)
33
- remote = @hadoop.base_dir + '/' + @metadir + '/' + @cassandra.cluster_name + '/' + node + '/cass_snap_' + date
34
- return @hadoop.read(remote).split("\n").to_set
35
- rescue Exception => e
36
- raise("Could not read metadata : #{e.message}")
37
- end
38
46
 
47
+ # Look for all snapshots already existing for "node" at time "date"
39
48
  def get_snapshots_node(node, date)
40
- result = []
49
+ results = []
50
+ dates = [date]
41
51
  begin
42
52
  if date == 'ALL'
43
- ls = @hadoop.list("#{@hadoop.base_dir}/#{@metadir}/#{@cassandra.cluster_name}/#{node}")
44
- ls_metadata = ls.select { |item| item['pathSuffix'].include? 'cass_snap_' }
45
- ls_metadata.each do |item|
46
- date = item['pathSuffix'].gsub('cass_snap_', '')
47
- metadata = get_snapshot_metadata(node, date)
48
- snapshot = CassandraSnapshot.new(@cassandra.cluster_name, node, date, metadata)
49
- result.push(snapshot)
50
- end
51
- else
52
- metadata = get_snapshot_metadata(node, date)
53
- snapshot = CassandraSnapshot.new(@cassandra.cluster_name, node, date, metadata)
54
- result.push(snapshot)
53
+ dates = @hadoop.list(metadata_dir_for_node(node))
54
+ .select { |dir| dir['pathSuffix'].include? 'cass_snap_' }
55
+ .map { |dir| dir['pathSuffix'].gsub('cass_snap_', '')}
55
56
  end
57
+
58
+ dates.each do |date|
59
+ metadata = @hadoop.read(metadata_dir_for_backup(node, date)).split("\n").to_set
60
+ results.push(CassandraSnapshot.new(@cassandra.cluster_name, node, date, metadata))
61
+ end
62
+
56
63
  rescue Exception => e
57
64
  @logger.warn("Could not get snapshots for node #{node} : #{e.message}")
58
65
  end
59
- result
66
+
67
+ return results
60
68
  end
61
69
 
62
- if node == 'ALL'
70
+ # Get the list of nodes
71
+ def get_node_list(node)
72
+ if node != 'ALL'
73
+ return [node]
74
+ end
75
+
76
+ nodes = []
63
77
  begin
64
- ls = @hadoop.list("#{@hadoop.base_dir}/#{@metadir}/#{@cassandra.cluster_name}")
65
- ls_nodes = ls.select { |item| item['type'].casecmp('DIRECTORY') == 0 }
66
- ls_nodes.each do |item|
67
- n = item['pathSuffix']
68
- result += get_snapshots_node(n, date)
69
- end
78
+ nodes = @hadoop.list(metadata_dir())
79
+ .select { |item| item['type'].casecmp('DIRECTORY') == 0 }
80
+ .map { |item| item['pathSuffix'] }
81
+ .flatten
70
82
  rescue Exception => e
71
- @logger.warn("Could not get snapshots for cluster #{@cassandra.cluster_name} : #{e.message}")
83
+ @logger.warn("Could not get node list for cluster #{@cassandra.cluster_name} : #{e.message}")
72
84
  end
73
- else
74
- result = get_snapshots_node(node, date)
85
+
86
+ return nodes
75
87
  end
76
88
 
77
- result.sort
89
+
90
+ # RUN
91
+ @logger.info("Searching snapshots for #{node} at time #{date}")
92
+ snapshots = get_node_list(node).map { |node| get_snapshots_node(node, date) }
93
+ .flatten
94
+ .sort
95
+ @logger.info("Found #{snapshots.length} snapshots")
96
+ return snapshots
78
97
  end
79
98
 
80
99
  def list_snapshots(node: @cassandra.node_name)
@@ -83,31 +102,40 @@ class BackupTool
83
102
  tp(snapshots, 'cluster', 'node', 'date')
84
103
  end
85
104
 
86
- def new_snapshot
87
- @logger.info('Starting a new snapshot')
88
- snapshot = @cassandra.new_snapshot
89
-
105
+ def prepare_hdfs_dirs(node)
106
+ @logger.info(':::::::: Prepare HDFS ::::::::')
90
107
  begin
91
- path = @hadoop.base_dir + '/' + snapshot.cluster + '/' + snapshot.node + '/'
92
- if not @hadoop.mkdir(path)
93
- raise("Could not create your cluster directory : #{path}")
108
+ paths = [data_dir_for_node(node), metadata_dir_for_node(node)]
109
+ paths.each do |path|
110
+ @logger.info("Creating destination directory " + path)
111
+ if not @hadoop.mkdir(path)
112
+ raise
113
+ end
94
114
  end
95
115
  rescue Exception => e
96
116
  raise("Could not create your cluster directory : #{e.message}")
97
117
  end
118
+ end
98
119
 
120
+ def new_snapshot
121
+ @logger.info(':::::::: Creating new snapshot ::::::::')
122
+ snapshot = @cassandra.new_snapshot
123
+
124
+ prepare_hdfs_dirs(snapshot.node)
125
+
126
+ @logger.info(':::::::: Get last backup ::::::::')
99
127
  existing = search_snapshots(node: snapshot.node)
100
128
  last = if existing.empty?
101
- CassandraSnapshot.new(snapshot.cluster, snapshot.node, 'never')
102
- else
103
- existing[-1]
104
- end
105
-
106
- @logger.info('Uploading tables to Hadoop')
129
+ then CassandraSnapshot.new(snapshot.cluster, snapshot.node, 'never')
130
+ else existing[-1] end
131
+ @logger.info("Last snapshot is #{last}")
107
132
  files = snapshot.metadata - last.metadata
108
133
  @logger.info("#{files.length} files to upload")
134
+
135
+
136
+ @logger.info('::::::: Uploading tables to HDFS ::::::')
109
137
  index = 0
110
- number_of_files = files.size
138
+ number_of_files = files.length
111
139
  total_file_size = 0
112
140
  files.each do |file|
113
141
  index += 1
@@ -115,26 +143,27 @@ class BackupTool
115
143
  local_file_size = File.size(local)
116
144
  total_file_size += local_file_size
117
145
  pretty_size = Filesize.from("#{local_file_size} B").pretty
118
- @logger.info("Sending file #{index}/#{number_of_files} #{file} having size #{pretty_size} to Hadoop")
119
- remote = @hadoop.base_dir + '/' + snapshot.cluster + '/' + snapshot.node + '/' + file
146
+ @logger.info("Sending file #{index}/#{number_of_files} #{file} having size #{pretty_size} to HDFS")
147
+ remote = data_dir_for_node(snapshot.node) + file
120
148
  @logger.debug("#{local} => #{remote}")
121
- f = File.open(local, 'r')
122
- begin
123
- retries = 3
124
- @hadoop.create(remote, f, overwrite: true)
125
- rescue
126
- @logger.info("Hadoop write failed - retrying in 1s")
127
- sleep 1
128
- retry if (retries -= 1) < 0
149
+ File.open(local, 'r') do |f|
150
+ begin
151
+ retries = 3
152
+ @hadoop.create(remote, f, overwrite: true)
153
+ rescue Exception => e
154
+ @logger.info("HDFS write failed: #{e.message}")
155
+ @logger.info("HDFS write retrying in 1s")
156
+ sleep 1
157
+ retry if (retries -= 1) < 0
158
+ end
129
159
  end
130
- f.close
131
160
  end
132
161
 
133
162
  total_file_size_pretty = Filesize.from("#{total_file_size} B").pretty
134
163
  @logger.info("Total size of uploaded files is #{total_file_size_pretty}")
135
164
 
136
- @logger.info('Sending metadata to Hadoop')
137
- remote = @hadoop.base_dir + '/' + @metadir + '/' + snapshot.cluster + '/' + snapshot.node + '/cass_snap_' + snapshot.date
165
+ @logger.info('Sending metadata to HDFS')
166
+ remote = metadata_dir_for_backup(snapshot.node, snapshot.date)
138
167
  @logger.debug("metadata => #{remote}")
139
168
  @hadoop.create(remote, snapshot.metadata.to_a * "\n", overwrite: true)
140
169
 
@@ -146,27 +175,27 @@ class BackupTool
146
175
  snapshots = search_snapshots(node: node, date: date)
147
176
  if snapshots.empty?
148
177
  raise('No snapshot found for deletion')
149
- else
150
- snapshots.each do |snapshot|
151
- @logger.info("Deleting snapshot #{snapshot}")
152
- node_snapshots = search_snapshots(node: snapshot.node)
153
- merged_metadata = Set.new
154
- node_snapshots.each do |s|
155
- merged_metadata += s.metadata if s != snapshot
156
- end
157
- files = snapshot.metadata - merged_metadata
158
- @logger.info("#{files.length} files to delete")
159
- files.each do |file|
160
- @logger.info("Deleting file #{file}")
161
- remote = @hadoop.base_dir + '/' + snapshot.cluster + '/' + snapshot.node + '/' + file
162
- @logger.debug("DELETE => #{remote}")
163
- @hadoop.delete(remote)
164
- end
165
- @logger.info('Deleting metadata in Hadoop')
166
- remote = @hadoop.base_dir + '/' + @metadir + '/' + snapshot.cluster + '/' + snapshot.node + '/cass_snap_' + snapshot.date
178
+ end
179
+
180
+ snapshots.each do |snapshot|
181
+ @logger.info("Deleting snapshot #{snapshot}")
182
+ node_snapshots = search_snapshots(node: snapshot.node)
183
+ merged_metadata = Set.new
184
+ node_snapshots.each do |s|
185
+ merged_metadata += s.metadata if s != snapshot
186
+ end
187
+ files = snapshot.metadata - merged_metadata
188
+ @logger.info("#{files.length} files to delete")
189
+ files.each do |file|
190
+ @logger.info("Deleting file #{file}")
191
+ remote = data_dir_for_node(snapshot.node) + '/' + file
167
192
  @logger.debug("DELETE => #{remote}")
168
193
  @hadoop.delete(remote)
169
194
  end
195
+ @logger.info('Deleting metadata in HDFS')
196
+ remote = metadata_dir_for_backup(snapshot.node, snapshot.date)
197
+ @logger.debug("DELETE => #{remote}")
198
+ @hadoop.delete(remote)
170
199
  end
171
200
  end
172
201
 
@@ -177,7 +206,7 @@ class BackupTool
177
206
  @logger.info("Cleaning backup data on all nodes before #{retention_date}.")
178
207
 
179
208
  all_snapshots = search_snapshots
180
- @logger.info("A total of #{all_snapshots.size} snapshots were found on Hadoop server.")
209
+ @logger.info("A total of #{all_snapshots.size} snapshots were found on HDFS.")
181
210
 
182
211
  snapshots_to_be_deleted = all_snapshots.select { |snapshot| snapshot.get_date < retention_date }
183
212
  @logger.info("A total of #{snapshots_to_be_deleted.size} snapshots will be deleted.")
@@ -187,14 +216,13 @@ class BackupTool
187
216
  end
188
217
 
189
218
  all_backup_flags = get_backup_flags
190
- @logger.info("A total of #{all_backup_flags.size} back up flags were found on Hadoop server.")
219
+ @logger.info("A total of #{all_backup_flags.size} back up flags were found on HDFS.")
191
220
 
192
221
  backup_flags_to_be_delete = all_backup_flags.select { |flag| flag.date < retention_date }
193
222
  @logger.info("A total of #{backup_flags_to_be_delete.size} backup flags will be deleted.")
194
223
 
195
- backup_flags_location = @hadoop.base_dir + '/' + @metadir + '/' + @cassandra.cluster_name
196
224
  backup_flags_to_be_delete.each do |flag|
197
- file = backup_flags_location + '/' + flag.file
225
+ file = metadata_dir() + flag.file
198
226
  @logger.info("Deleting #{file}")
199
227
  @hadoop.delete(file)
200
228
  end
@@ -204,19 +232,16 @@ class BackupTool
204
232
  # This is an individual command that has to be called manually after snapshots have finished
205
233
  def create_backup_flag(date)
206
234
  file_name = 'BACKUP_COMPLETED_' + date
207
- remote_file = @hadoop.base_dir + '/' + @metadir + '/' + @cassandra.cluster_name + '/' + file_name
235
+ remote_file = metadata_dir() + file_name
208
236
 
209
237
  @logger.info('Setting backup completed flag : ' + remote_file)
210
238
  @hadoop.create(remote_file, '', overwrite: true)
211
239
  end
212
240
 
213
241
  def get_backup_flags
214
- backup_flags_location = @hadoop.base_dir + '/' + @metadir + '/' + @cassandra.cluster_name
215
- ls = @hadoop.list(backup_flags_location)
216
- backup_flags = ls.select { |item| item['pathSuffix'].include? 'BACKUP_COMPLETED_' }
217
- backup_flags.collect do |file|
218
- BackupFlag.new(@cassandra.cluster_name, file['pathSuffix'])
219
- end
242
+ @hadoop.list(metadata_dir())
243
+ .select { |item| item['pathSuffix'].include? 'BACKUP_COMPLETED_' }
244
+ .collect { |file| BackupFlag.new(@cassandra.cluster_name, file['pathSuffix']) }
220
245
  end
221
246
 
222
247
  # Download a file from HDFS, buffered way
@@ -279,7 +304,7 @@ class BackupTool
279
304
  files_to_be_restored.each do |file|
280
305
  @logger.info("Restoring file #{file}")
281
306
  local = destination + '/' + file
282
- remote = @hadoop.base_dir + '/' + snapshot.cluster + '/' + snapshot.node + '/' + file
307
+ remote = data_dir_for_node(snapshot.node) + file
283
308
  # Download the file from hdfs
284
309
  buffered_download(remote, local)
285
310
  end
data/lib/cassandra.rb CHANGED
@@ -89,7 +89,7 @@ class Cassandra
89
89
  def get_keyspaces_and_tables
90
90
  result = {}
91
91
  Dir.foreach(@data_path) do |keyspace|
92
- next if keyspace == '.' || keyspace == '..'
92
+ next if keyspace == '.' || keyspace == '..' || !Dir.exist?(@data_path + '/' + keyspace)
93
93
  result[keyspace] = []
94
94
  Dir.foreach(@data_path + '/' + keyspace) do |table|
95
95
  next if table == '.' || table == '..'
@@ -1,3 +1,3 @@
1
1
  module Cassback
2
- VERSION = '0.2.4'.freeze
2
+ VERSION = '0.2.5'.freeze
3
3
  end
data/test/hadoop_stub.rb CHANGED
@@ -26,6 +26,10 @@ class HadoopStub
26
26
  end
27
27
  end
28
28
 
29
+ def mkdir(path, _options = {})
30
+ return true
31
+ end
32
+
29
33
  def list_files(path, _options = {})
30
34
  files_and_folders = Dir.glob("#{path}/**/*")
31
35
  files_and_folders.select { |file| File.file?(file) }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cassback
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.2.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Vincent Van Hollebeke
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2017-01-19 00:00:00.000000000 Z
12
+ date: 2017-01-23 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler