cassback 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f0fb33ef12a8632d9c1af04424c10dc15b9ec312
4
- data.tar.gz: 96c27b901cfd9dca3e575ebff41fbbd2d09e9ad0
3
+ metadata.gz: 5922de1fa29428f24e257451d544e2e04e660d96
4
+ data.tar.gz: e40787d17308c083cf302b38c0153f169025aaff
5
5
  SHA512:
6
- metadata.gz: c5f859dcc38bced28741136bd11da2a3cf9fa8432de8082d3f8de1dc3eef849a6fc9483f7ffe1371b5cd95d9689683293b2f3bcd20749738cca5d5936af22454
7
- data.tar.gz: 570d68977b101c76e4749dbe098cb83776a95fa3716f7dec1c3505b67bd5c2ba2eb329a92268bd05a41a88c6eab856da37cf0212c9b46c67b3bc7574d7ad4678
6
+ metadata.gz: bbd25e253aa0ef1c31b12cc83199e4b3d45d60fa7debd68b91477221e58c8b30419250ebf28c4c52ec35207a554e05bbe53a305e7da1ee33d5194d83e1419c19
7
+ data.tar.gz: b76a37273cdae0d46a3c1d2f604fb9016513b0a01e28384702e9ad2d25497e8c88df27e41ab2c4f2aa0105288f91c9617b6eaa125d401c8cc50db3091da6eebc
data/lib/backuptool.rb CHANGED
@@ -22,59 +22,78 @@ class BackupTool
22
22
  @metadir = META_DIR
23
23
  end
24
24
 
25
+ def metadata_dir_for_backup(node, date)
26
+ return metadata_dir() + node + '/cass_snap_' + date + '/'
27
+ end
28
+
29
+ def metadata_dir_for_node(node)
30
+ return metadata_dir() + node + '/'
31
+ end
32
+
33
+ def metadata_dir()
34
+ return @hadoop.base_dir + '/' + @metadir + '/' + @cassandra.cluster_name + '/'
35
+ end
36
+
37
+ def data_dir_for_node(node)
38
+ return @hadoop.base_dir + '/' + @cassandra.cluster_name + '/' + node + '/'
39
+ end
40
+
25
41
  # Look for snapshots
26
42
  # * *Args* :
27
43
  # - +node+ -> Cassandra node name
28
44
  # - +date+ -> HDFS instance
29
45
  def search_snapshots(node: 'ALL', date: 'ALL')
30
- result = []
31
-
32
- def get_snapshot_metadata(node, date)
33
- remote = @hadoop.base_dir + '/' + @metadir + '/' + @cassandra.cluster_name + '/' + node + '/cass_snap_' + date
34
- return @hadoop.read(remote).split("\n").to_set
35
- rescue Exception => e
36
- raise("Could not read metadata : #{e.message}")
37
- end
38
46
 
47
+ # Look for all snapshots already existing for "node" at time "date"
39
48
  def get_snapshots_node(node, date)
40
- result = []
49
+ results = []
50
+ dates = [date]
41
51
  begin
42
52
  if date == 'ALL'
43
- ls = @hadoop.list("#{@hadoop.base_dir}/#{@metadir}/#{@cassandra.cluster_name}/#{node}")
44
- ls_metadata = ls.select { |item| item['pathSuffix'].include? 'cass_snap_' }
45
- ls_metadata.each do |item|
46
- date = item['pathSuffix'].gsub('cass_snap_', '')
47
- metadata = get_snapshot_metadata(node, date)
48
- snapshot = CassandraSnapshot.new(@cassandra.cluster_name, node, date, metadata)
49
- result.push(snapshot)
50
- end
51
- else
52
- metadata = get_snapshot_metadata(node, date)
53
- snapshot = CassandraSnapshot.new(@cassandra.cluster_name, node, date, metadata)
54
- result.push(snapshot)
53
+ dates = @hadoop.list(metadata_dir_for_node(node))
54
+ .select { |dir| dir['pathSuffix'].include? 'cass_snap_' }
55
+ .map { |dir| dir['pathSuffix'].gsub('cass_snap_', '')}
55
56
  end
57
+
58
+ dates.each do |date|
59
+ metadata = @hadoop.read(metadata_dir_for_backup(node, date)).split("\n").to_set
60
+ results.push(CassandraSnapshot.new(@cassandra.cluster_name, node, date, metadata))
61
+ end
62
+
56
63
  rescue Exception => e
57
64
  @logger.warn("Could not get snapshots for node #{node} : #{e.message}")
58
65
  end
59
- result
66
+
67
+ return results
60
68
  end
61
69
 
62
- if node == 'ALL'
70
+ # Get the list of nodes
71
+ def get_node_list(node)
72
+ if node != 'ALL'
73
+ return [node]
74
+ end
75
+
76
+ nodes = []
63
77
  begin
64
- ls = @hadoop.list("#{@hadoop.base_dir}/#{@metadir}/#{@cassandra.cluster_name}")
65
- ls_nodes = ls.select { |item| item['type'].casecmp('DIRECTORY') == 0 }
66
- ls_nodes.each do |item|
67
- n = item['pathSuffix']
68
- result += get_snapshots_node(n, date)
69
- end
78
+ nodes = @hadoop.list(metadata_dir())
79
+ .select { |item| item['type'].casecmp('DIRECTORY') == 0 }
80
+ .map { |item| item['pathSuffix'] }
81
+ .flatten
70
82
  rescue Exception => e
71
- @logger.warn("Could not get snapshots for cluster #{@cassandra.cluster_name} : #{e.message}")
83
+ @logger.warn("Could not get node list for cluster #{@cassandra.cluster_name} : #{e.message}")
72
84
  end
73
- else
74
- result = get_snapshots_node(node, date)
85
+
86
+ return nodes
75
87
  end
76
88
 
77
- result.sort
89
+
90
+ # RUN
91
+ @logger.info("Searching snapshots for #{node} at time #{date}")
92
+ snapshots = get_node_list(node).map { |node| get_snapshots_node(node, date) }
93
+ .flatten
94
+ .sort
95
+ @logger.info("Found #{snapshots.length} snapshots")
96
+ return snapshots
78
97
  end
79
98
 
80
99
  def list_snapshots(node: @cassandra.node_name)
@@ -83,31 +102,40 @@ class BackupTool
83
102
  tp(snapshots, 'cluster', 'node', 'date')
84
103
  end
85
104
 
86
- def new_snapshot
87
- @logger.info('Starting a new snapshot')
88
- snapshot = @cassandra.new_snapshot
89
-
105
+ def prepare_hdfs_dirs(node)
106
+ @logger.info(':::::::: Prepare HDFS ::::::::')
90
107
  begin
91
- path = @hadoop.base_dir + '/' + snapshot.cluster + '/' + snapshot.node + '/'
92
- if not @hadoop.mkdir(path)
93
- raise("Could not create your cluster directory : #{path}")
108
+ paths = [data_dir_for_node(node), metadata_dir_for_node(node)]
109
+ paths.each do |path|
110
+ @logger.info("Creating destination directory " + path)
111
+ if not @hadoop.mkdir(path)
112
+ raise
113
+ end
94
114
  end
95
115
  rescue Exception => e
96
116
  raise("Could not create your cluster directory : #{e.message}")
97
117
  end
118
+ end
98
119
 
120
+ def new_snapshot
121
+ @logger.info(':::::::: Creating new snapshot ::::::::')
122
+ snapshot = @cassandra.new_snapshot
123
+
124
+ prepare_hdfs_dirs(snapshot.node)
125
+
126
+ @logger.info(':::::::: Get last backup ::::::::')
99
127
  existing = search_snapshots(node: snapshot.node)
100
128
  last = if existing.empty?
101
- CassandraSnapshot.new(snapshot.cluster, snapshot.node, 'never')
102
- else
103
- existing[-1]
104
- end
105
-
106
- @logger.info('Uploading tables to Hadoop')
129
+ then CassandraSnapshot.new(snapshot.cluster, snapshot.node, 'never')
130
+ else existing[-1] end
131
+ @logger.info("Last snapshot is #{last}")
107
132
  files = snapshot.metadata - last.metadata
108
133
  @logger.info("#{files.length} files to upload")
134
+
135
+
136
+ @logger.info('::::::: Uploading tables to HDFS ::::::')
109
137
  index = 0
110
- number_of_files = files.size
138
+ number_of_files = files.length
111
139
  total_file_size = 0
112
140
  files.each do |file|
113
141
  index += 1
@@ -115,26 +143,27 @@ class BackupTool
115
143
  local_file_size = File.size(local)
116
144
  total_file_size += local_file_size
117
145
  pretty_size = Filesize.from("#{local_file_size} B").pretty
118
- @logger.info("Sending file #{index}/#{number_of_files} #{file} having size #{pretty_size} to Hadoop")
119
- remote = @hadoop.base_dir + '/' + snapshot.cluster + '/' + snapshot.node + '/' + file
146
+ @logger.info("Sending file #{index}/#{number_of_files} #{file} having size #{pretty_size} to HDFS")
147
+ remote = data_dir_for_node(snapshot.node) + file
120
148
  @logger.debug("#{local} => #{remote}")
121
- f = File.open(local, 'r')
122
- begin
123
- retries = 3
124
- @hadoop.create(remote, f, overwrite: true)
125
- rescue
126
- @logger.info("Hadoop write failed - retrying in 1s")
127
- sleep 1
128
- retry if (retries -= 1) < 0
149
+ File.open(local, 'r') do |f|
150
+ begin
151
+ retries = 3
152
+ @hadoop.create(remote, f, overwrite: true)
153
+ rescue Exception => e
154
+ @logger.info("HDFS write failed: #{e.message}")
155
+ @logger.info("HDFS write retrying in 1s")
156
+ sleep 1
157
+ retry if (retries -= 1) < 0
158
+ end
129
159
  end
130
- f.close
131
160
  end
132
161
 
133
162
  total_file_size_pretty = Filesize.from("#{total_file_size} B").pretty
134
163
  @logger.info("Total size of uploaded files is #{total_file_size_pretty}")
135
164
 
136
- @logger.info('Sending metadata to Hadoop')
137
- remote = @hadoop.base_dir + '/' + @metadir + '/' + snapshot.cluster + '/' + snapshot.node + '/cass_snap_' + snapshot.date
165
+ @logger.info('Sending metadata to HDFS')
166
+ remote = metadata_dir_for_backup(snapshot.node, snapshot.date)
138
167
  @logger.debug("metadata => #{remote}")
139
168
  @hadoop.create(remote, snapshot.metadata.to_a * "\n", overwrite: true)
140
169
 
@@ -146,27 +175,27 @@ class BackupTool
146
175
  snapshots = search_snapshots(node: node, date: date)
147
176
  if snapshots.empty?
148
177
  raise('No snapshot found for deletion')
149
- else
150
- snapshots.each do |snapshot|
151
- @logger.info("Deleting snapshot #{snapshot}")
152
- node_snapshots = search_snapshots(node: snapshot.node)
153
- merged_metadata = Set.new
154
- node_snapshots.each do |s|
155
- merged_metadata += s.metadata if s != snapshot
156
- end
157
- files = snapshot.metadata - merged_metadata
158
- @logger.info("#{files.length} files to delete")
159
- files.each do |file|
160
- @logger.info("Deleting file #{file}")
161
- remote = @hadoop.base_dir + '/' + snapshot.cluster + '/' + snapshot.node + '/' + file
162
- @logger.debug("DELETE => #{remote}")
163
- @hadoop.delete(remote)
164
- end
165
- @logger.info('Deleting metadata in Hadoop')
166
- remote = @hadoop.base_dir + '/' + @metadir + '/' + snapshot.cluster + '/' + snapshot.node + '/cass_snap_' + snapshot.date
178
+ end
179
+
180
+ snapshots.each do |snapshot|
181
+ @logger.info("Deleting snapshot #{snapshot}")
182
+ node_snapshots = search_snapshots(node: snapshot.node)
183
+ merged_metadata = Set.new
184
+ node_snapshots.each do |s|
185
+ merged_metadata += s.metadata if s != snapshot
186
+ end
187
+ files = snapshot.metadata - merged_metadata
188
+ @logger.info("#{files.length} files to delete")
189
+ files.each do |file|
190
+ @logger.info("Deleting file #{file}")
191
+ remote = data_dir_for_node(snapshot.node) + '/' + file
167
192
  @logger.debug("DELETE => #{remote}")
168
193
  @hadoop.delete(remote)
169
194
  end
195
+ @logger.info('Deleting metadata in HDFS')
196
+ remote = metadata_dir_for_backup(snapshot.node, snapshot.date)
197
+ @logger.debug("DELETE => #{remote}")
198
+ @hadoop.delete(remote)
170
199
  end
171
200
  end
172
201
 
@@ -177,7 +206,7 @@ class BackupTool
177
206
  @logger.info("Cleaning backup data on all nodes before #{retention_date}.")
178
207
 
179
208
  all_snapshots = search_snapshots
180
- @logger.info("A total of #{all_snapshots.size} snapshots were found on Hadoop server.")
209
+ @logger.info("A total of #{all_snapshots.size} snapshots were found on HDFS.")
181
210
 
182
211
  snapshots_to_be_deleted = all_snapshots.select { |snapshot| snapshot.get_date < retention_date }
183
212
  @logger.info("A total of #{snapshots_to_be_deleted.size} snapshots will be deleted.")
@@ -187,14 +216,13 @@ class BackupTool
187
216
  end
188
217
 
189
218
  all_backup_flags = get_backup_flags
190
- @logger.info("A total of #{all_backup_flags.size} back up flags were found on Hadoop server.")
219
+ @logger.info("A total of #{all_backup_flags.size} back up flags were found on HDFS.")
191
220
 
192
221
  backup_flags_to_be_delete = all_backup_flags.select { |flag| flag.date < retention_date }
193
222
  @logger.info("A total of #{backup_flags_to_be_delete.size} backup flags will be deleted.")
194
223
 
195
- backup_flags_location = @hadoop.base_dir + '/' + @metadir + '/' + @cassandra.cluster_name
196
224
  backup_flags_to_be_delete.each do |flag|
197
- file = backup_flags_location + '/' + flag.file
225
+ file = metadata_dir() + flag.file
198
226
  @logger.info("Deleting #{file}")
199
227
  @hadoop.delete(file)
200
228
  end
@@ -204,19 +232,16 @@ class BackupTool
204
232
  # This is an individual command that has to be called manually after snapshots have finished
205
233
  def create_backup_flag(date)
206
234
  file_name = 'BACKUP_COMPLETED_' + date
207
- remote_file = @hadoop.base_dir + '/' + @metadir + '/' + @cassandra.cluster_name + '/' + file_name
235
+ remote_file = metadata_dir() + file_name
208
236
 
209
237
  @logger.info('Setting backup completed flag : ' + remote_file)
210
238
  @hadoop.create(remote_file, '', overwrite: true)
211
239
  end
212
240
 
213
241
  def get_backup_flags
214
- backup_flags_location = @hadoop.base_dir + '/' + @metadir + '/' + @cassandra.cluster_name
215
- ls = @hadoop.list(backup_flags_location)
216
- backup_flags = ls.select { |item| item['pathSuffix'].include? 'BACKUP_COMPLETED_' }
217
- backup_flags.collect do |file|
218
- BackupFlag.new(@cassandra.cluster_name, file['pathSuffix'])
219
- end
242
+ @hadoop.list(metadata_dir())
243
+ .select { |item| item['pathSuffix'].include? 'BACKUP_COMPLETED_' }
244
+ .collect { |file| BackupFlag.new(@cassandra.cluster_name, file['pathSuffix']) }
220
245
  end
221
246
 
222
247
  # Download a file from HDFS, buffered way
@@ -279,7 +304,7 @@ class BackupTool
279
304
  files_to_be_restored.each do |file|
280
305
  @logger.info("Restoring file #{file}")
281
306
  local = destination + '/' + file
282
- remote = @hadoop.base_dir + '/' + snapshot.cluster + '/' + snapshot.node + '/' + file
307
+ remote = data_dir_for_node(snapshot.node) + file
283
308
  # Download the file from hdfs
284
309
  buffered_download(remote, local)
285
310
  end
data/lib/cassandra.rb CHANGED
@@ -89,7 +89,7 @@ class Cassandra
89
89
  def get_keyspaces_and_tables
90
90
  result = {}
91
91
  Dir.foreach(@data_path) do |keyspace|
92
- next if keyspace == '.' || keyspace == '..'
92
+ next if keyspace == '.' || keyspace == '..' || !Dir.exist?(@data_path + '/' + keyspace)
93
93
  result[keyspace] = []
94
94
  Dir.foreach(@data_path + '/' + keyspace) do |table|
95
95
  next if table == '.' || table == '..'
@@ -1,3 +1,3 @@
1
1
  module Cassback
2
- VERSION = '0.2.4'.freeze
2
+ VERSION = '0.2.5'.freeze
3
3
  end
data/test/hadoop_stub.rb CHANGED
@@ -26,6 +26,10 @@ class HadoopStub
26
26
  end
27
27
  end
28
28
 
29
+ def mkdir(path, _options = {})
30
+ return true
31
+ end
32
+
29
33
  def list_files(path, _options = {})
30
34
  files_and_folders = Dir.glob("#{path}/**/*")
31
35
  files_and_folders.select { |file| File.file?(file) }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cassback
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.2.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Vincent Van Hollebeke
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2017-01-19 00:00:00.000000000 Z
12
+ date: 2017-01-23 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler