cassback 0.2.2 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4e7a97a9c8109931fcc60bc344d4bbd21e77b52d
4
- data.tar.gz: ae8b20c65edb06200c39cff9d6807c30bce641b9
3
+ metadata.gz: 76766f06f7d636939af96dc78a6af157c652a303
4
+ data.tar.gz: 4c850cec7d6ca903b5750df42c84575fc7e01137
5
5
  SHA512:
6
- metadata.gz: c1f13b8a68a8f8c1ab87f6128824acf796c41ae2ed3c1adc6844c264cd5a3a7c74bfd9aad7f301e529dd080b6328dc09f642c18db57df15ad5c22f51d7f440a8
7
- data.tar.gz: 3e125579865dd92ee08f1bff96aaf8991ccc3066e51d5ca3ffc121531af1b0f2b90c9b3bfa812d303fcef1e77154e7e8922debc4b08d984185996ef206f1f511
6
+ metadata.gz: 093b50c2986c8baf54169530db3d7f5c76da98428cc95e16fcceb03467783b0ac2891112ea4b8a5182460c1c4614b1d3fc22090ceed8d010f728177868714384
7
+ data.tar.gz: 009f9557521432e5d580163e5b6ca82073f1fbecb70652b0166f46104ac2867f7f393475d35910e49ca83b0b0058e6e7ad8ac32ed2d1c05da176abc64734276f
data/bin/cassback CHANGED
@@ -63,6 +63,8 @@ options = {
63
63
  },
64
64
  'restore' => {
65
65
  'destination' => 'cassandra',
66
+ 'keyspace' => 'ALL',
67
+ 'table' => 'ALL',
66
68
  },
67
69
 
68
70
  'cleanup' => {
@@ -113,9 +115,15 @@ parser = OptionParser.new do |opts|
113
115
  opts.on('-d', '--date DATE', 'snapshot date, like YYYY_MM_DD') do |v|
114
116
  options['date'] = v
115
117
  end
116
- opts.on('-t', '--destination DIR', 'local destination path for restore (default is cassandra)') do |v|
118
+ opts.on('-o', '--destination DIR', 'local destination/output path for restore (default is cassandra)') do |v|
117
119
  command_line_config['restore']['destination'] = v
118
120
  end
121
+ opts.on('-k', '--keyspace KEYSPACE', 'keyspace that will be restored (by default all are restored)') do |v|
122
+ command_line_config['restore']['keyspace'] = v
123
+ end
124
+ opts.on('-t', '--table TABLE', 'table that will be restored (by default all are restored)') do |v|
125
+ command_line_config['restore']['table'] = v
126
+ end
119
127
 
120
128
  opts.separator ''
121
129
  opts.separator 'Hadoop (WebHDFS):'
@@ -194,7 +202,8 @@ begin
194
202
  # Restore a snapshot
195
203
  elsif action == 'restore'
196
204
  raise('No date given') unless options.include? 'date'
197
- bck.restore_snapshot(options['node'], options['date'], options['restore']['destination'])
205
+ bck.restore_snapshot(options['node'], options['date'], options['restore']['destination'],
206
+ keyspace: options['restore']['keyspace'], table: options['restore']['table'])
198
207
 
199
208
  # List snapshots
200
209
  elsif action == 'list'
data/lib/backuptool.rb CHANGED
@@ -243,7 +243,7 @@ class BackupTool
243
243
  # - +node+ -> node where the snapshot comes from
244
244
  # - +date+ -> snapshot date
245
245
  # - +destination+ -> local directory where to restore
246
- def restore_snapshot(node, date, destination)
246
+ def restore_snapshot(node, date, destination, keyspace: 'ALL', table: 'ALL')
247
247
  # Search the snapshot matching node and date
248
248
  snapshots = search_snapshots(node: node, date: date)
249
249
 
@@ -254,10 +254,20 @@ class BackupTool
254
254
  else
255
255
  snapshot = snapshots[0]
256
256
  @logger.info("Restoring snapshot #{snapshot}")
257
- @logger.info("#{snapshot.metadata.length} files to restore")
257
+ @logger.info("Snapshot has #{snapshot.metadata.length} files")
258
258
 
259
- # For each file in metadata
260
- snapshot.metadata.each do |file|
259
+ files_to_be_restored = snapshot.metadata.select { |item|
260
+ filename = File.basename(item)
261
+ matches_keyspace = keyspace == 'ALL' || (filename.include? keyspace)
262
+ matches_table = table == 'ALL' || (filename.include? table)
263
+ matches_keyspace && matches_table
264
+ }
265
+
266
+ @logger.info("Found #{files_to_be_restored.length} to be restored that match
267
+ keyspace #{keyspace} and table #{table}")
268
+
269
+ # For each file in the list
270
+ files_to_be_restored.each do |file|
261
271
  @logger.info("Restoring file #{file}")
262
272
  local = destination + '/' + file
263
273
  remote = @hadoop.base_dir + '/' + snapshot.cluster + '/' + snapshot.node + '/' + file
@@ -1,3 +1,3 @@
1
1
  module Cassback
2
- VERSION = '0.2.2'.freeze
2
+ VERSION = '0.2.3'.freeze
3
3
  end
@@ -12,14 +12,17 @@ class CassandraStub
12
12
  @data_path = 'test/cassandra' + '/' + cluster_name + '/' + node_name + '/'
13
13
  FileUtils.mkdir_p(@data_path)
14
14
 
15
- # create some fake sstables
16
15
  @metadata = Set.new
16
+ end
17
+ end
18
+
19
+ public
20
+ def add_fake_files(file_indexes, keyspace, table)
17
21
  file_indexes.each do |index|
18
- file_name = "SSTable-#{index}-Data.db"
22
+ file_name = "#{keyspace}-#{table}#{index}-Data.db"
19
23
  file_path = @data_path + '/' + file_name
20
24
  File.open(file_path, 'w') { |file| file.write('This is a test file that simulates an SSTable') }
21
25
  @metadata.add(file_name)
22
- end
23
26
  end
24
27
 
25
28
  def new_snapshot
@@ -6,9 +6,12 @@ require_relative '../lib/backuptool'
6
6
  require_relative 'hadoop_stub'
7
7
  require_relative 'cassandra_stub'
8
8
 
9
- class TestSimpleNumber < Test::Unit::TestCase
9
+ class TestBackupTool < Test::Unit::TestCase
10
10
  def test_new_snapshot
11
11
  hadoop = HadoopStub.new('test/hadoop')
12
+
13
+ clean_test_data(hadoop)
14
+
12
15
  create_new_snapshot(hadoop, 'node1', '2016_04_22', [1, 2])
13
16
 
14
17
  remote_files = hadoop.list_files('test/hadoop')
@@ -26,12 +29,14 @@ class TestSimpleNumber < Test::Unit::TestCase
26
29
  assert(metadata_content.include?('SSTable-2-Data.db'))
27
30
 
28
31
  # cleanup
29
- hadoop.delete('test/hadoop')
30
- hadoop.delete('test/cassandra')
32
+ clean_test_data(hadoop)
31
33
  end
32
34
 
33
35
  def test_two_snapshots
34
36
  hadoop = HadoopStub.new('test/hadoop')
37
+
38
+ clean_test_data(hadoop)
39
+
35
40
  create_new_snapshot(hadoop, 'node1', '2016_04_22', [1, 2])
36
41
  create_new_snapshot(hadoop, 'node1', '2016_04_23', [2, 3, 4])
37
42
 
@@ -60,12 +65,14 @@ class TestSimpleNumber < Test::Unit::TestCase
60
65
  assert(metadata_content.include?('SSTable-4-Data.db'))
61
66
 
62
67
  # cleanup
63
- hadoop.delete('test/hadoop')
64
- hadoop.delete('test/cassandra')
68
+ clean_test_data(hadoop)
65
69
  end
66
70
 
67
71
  def test_restore
68
72
  hadoop = HadoopStub.new('test/hadoop')
73
+
74
+ clean_test_data(hadoop)
75
+
69
76
  backup_tool = create_new_snapshot(hadoop, 'node1', '2016_04_22', [1, 2])
70
77
 
71
78
  # restore a newly created snapshot
@@ -78,13 +85,36 @@ class TestSimpleNumber < Test::Unit::TestCase
78
85
  assert_equal('test/restore/SSTable-2-Data.db', restored_files[1])
79
86
 
80
87
  # cleanup
81
- hadoop.delete('test/hadoop')
82
- hadoop.delete('test/restore')
83
- hadoop.delete('test/cassandra')
88
+ clean_test_data(hadoop)
89
+ end
90
+
91
+ def test_restore_with_filtering
92
+ hadoop = HadoopStub.new('test/hadoop')
93
+ clean_test_data(hadoop)
94
+
95
+ keyspace = 'profile'
96
+ table = 'hash_status'
97
+
98
+ backup_tool = create_new_snapshot_2(hadoop, 'node1', '2016_04_22', [1, 2], keyspace, table)
99
+
100
+ # restore a newly created snapshot
101
+ backup_tool.restore_snapshot('node1', '2016_04_22', 'test/restore', keyspace: keyspace, table: table)
102
+
103
+ restored_files = hadoop.list_files('test/restore')
104
+ # two files were restored
105
+ assert_equal(2, restored_files.size)
106
+ assert_equal('test/restore/profile-hash_status1-Data.db', restored_files[0])
107
+ assert_equal('test/restore/profile-hash_status2-Data.db', restored_files[1])
108
+
109
+ # cleanup
110
+ clean_test_data(hadoop)
84
111
  end
85
112
 
86
113
  def test_delete
87
114
  hadoop = HadoopStub.new('test/hadoop')
115
+
116
+ clean_test_data(hadoop)
117
+
88
118
  backup_tool = create_new_snapshot(hadoop, 'node1', '2016_04_22', [1, 2])
89
119
 
90
120
  # delete a newly created snapshot
@@ -93,11 +123,14 @@ class TestSimpleNumber < Test::Unit::TestCase
93
123
  remote_files = hadoop.list_files('test/hadoop')
94
124
  assert_equal(0, remote_files.size)
95
125
 
96
- hadoop.delete('test/cassandra')
126
+ clean_test_data(hadoop)
97
127
  end
98
128
 
99
129
  def test_backup_flag
100
130
  hadoop = HadoopStub.new('test/hadoop')
131
+
132
+ clean_test_data(hadoop)
133
+
101
134
  backup_tool = create_new_snapshot(hadoop, 'node1', '2016_04_22', [1, 2])
102
135
 
103
136
  backup_tool.create_backup_flag('2016_04_22')
@@ -108,12 +141,14 @@ class TestSimpleNumber < Test::Unit::TestCase
108
141
  assert_equal('test/hadoop/cass_snap_metadata/cluster1/BACKUP_COMPLETED_2016_04_22', remote_files[0])
109
142
 
110
143
  # cleanup
111
- hadoop.delete('test/hadoop')
112
- hadoop.delete('test/cassandra')
144
+ clean_test_data(hadoop)
113
145
  end
114
146
 
115
147
  def test_get_backup_flag
116
148
  hadoop = HadoopStub.new('test/hadoop')
149
+
150
+ clean_test_data(hadoop)
151
+
117
152
  backup_tool = create_new_snapshot(hadoop, 'node1', '2016_04_22', [1, 2])
118
153
 
119
154
  backup_tool.create_backup_flag('2016_04_22')
@@ -126,12 +161,14 @@ class TestSimpleNumber < Test::Unit::TestCase
126
161
  assert_equal('BACKUP_COMPLETED_2016_04_22', flags[0].file)
127
162
 
128
163
  # cleanup
129
- hadoop.delete('test/hadoop')
130
- hadoop.delete('test/cassandra')
164
+ clean_test_data(hadoop)
131
165
  end
132
166
 
133
167
  def test_cleanup
134
168
  hadoop = HadoopStub.new('test/hadoop')
169
+
170
+ clean_test_data(hadoop)
171
+
135
172
  retention_days = 30
136
173
 
137
174
  date_31_days_back = (Date.today - 31).strftime('%Y_%m_%d')
@@ -164,17 +201,43 @@ class TestSimpleNumber < Test::Unit::TestCase
164
201
  assert_equal("BACKUP_COMPLETED_#{date_30_days_back}", backup_flags[0].file)
165
202
 
166
203
  # cleanup
167
- hadoop.delete('test/hadoop')
168
- hadoop.delete('test/cassandra')
204
+ clean_test_data(hadoop)
169
205
  end
170
206
 
171
207
  def create_new_snapshot(hadoop, node, date, file_indexes)
172
208
  logger = Logger.new(STDOUT)
173
209
  cassandra = CassandraStub.new('cluster1', node, date, file_indexes)
210
+
211
+ # Add some fake files
212
+ cassandra.add_fake_files(file_indexes, 'SSTable', '')
213
+
174
214
  backup_tool = BackupTool.new(cassandra, hadoop, logger)
215
+ backup_tool.new_snapshot
216
+
217
+ backup_tool
218
+ end
219
+
220
+ def create_new_snapshot_2(hadoop, node, date, file_indexes, keyspace, table)
221
+ logger = Logger.new(STDOUT)
222
+
223
+ cassandra = CassandraStub.new('cluster1', node, date, file_indexes)
224
+
225
+ # Add some files with keyspace and table
226
+ cassandra.add_fake_files(file_indexes, keyspace, table)
227
+ # Add some files not matching keyspace and table
228
+ cassandra.add_fake_files(file_indexes, 'test', 'test')
229
+
175
230
 
231
+ backup_tool = BackupTool.new(cassandra, hadoop, logger)
176
232
  backup_tool.new_snapshot
177
233
 
178
234
  backup_tool
179
235
  end
236
+
237
+ def clean_test_data(hadoop)
238
+ # cleanup
239
+ hadoop.delete('test/hadoop')
240
+ hadoop.delete('test/restore')
241
+ hadoop.delete('test/cassandra')
242
+ end
180
243
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cassback
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Vincent Van Hollebeke
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-09-21 00:00:00.000000000 Z
12
+ date: 2016-10-28 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -168,7 +168,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
168
168
  version: '0'
169
169
  requirements: []
170
170
  rubyforge_project:
171
- rubygems_version: 2.5.1
171
+ rubygems_version: 2.4.8
172
172
  signing_key:
173
173
  specification_version: 4
174
174
  summary: Cassandra backup to HDFS.