cassback 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4e7a97a9c8109931fcc60bc344d4bbd21e77b52d
4
- data.tar.gz: ae8b20c65edb06200c39cff9d6807c30bce641b9
3
+ metadata.gz: 76766f06f7d636939af96dc78a6af157c652a303
4
+ data.tar.gz: 4c850cec7d6ca903b5750df42c84575fc7e01137
5
5
  SHA512:
6
- metadata.gz: c1f13b8a68a8f8c1ab87f6128824acf796c41ae2ed3c1adc6844c264cd5a3a7c74bfd9aad7f301e529dd080b6328dc09f642c18db57df15ad5c22f51d7f440a8
7
- data.tar.gz: 3e125579865dd92ee08f1bff96aaf8991ccc3066e51d5ca3ffc121531af1b0f2b90c9b3bfa812d303fcef1e77154e7e8922debc4b08d984185996ef206f1f511
6
+ metadata.gz: 093b50c2986c8baf54169530db3d7f5c76da98428cc95e16fcceb03467783b0ac2891112ea4b8a5182460c1c4614b1d3fc22090ceed8d010f728177868714384
7
+ data.tar.gz: 009f9557521432e5d580163e5b6ca82073f1fbecb70652b0166f46104ac2867f7f393475d35910e49ca83b0b0058e6e7ad8ac32ed2d1c05da176abc64734276f
data/bin/cassback CHANGED
@@ -63,6 +63,8 @@ options = {
63
63
  },
64
64
  'restore' => {
65
65
  'destination' => 'cassandra',
66
+ 'keyspace' => 'ALL',
67
+ 'table' => 'ALL',
66
68
  },
67
69
 
68
70
  'cleanup' => {
@@ -113,9 +115,15 @@ parser = OptionParser.new do |opts|
113
115
  opts.on('-d', '--date DATE', 'snapshot date, like YYYY_MM_DD') do |v|
114
116
  options['date'] = v
115
117
  end
116
- opts.on('-t', '--destination DIR', 'local destination path for restore (default is cassandra)') do |v|
118
+ opts.on('-o', '--destination DIR', 'local destination/output path for restore (default is cassandra)') do |v|
117
119
  command_line_config['restore']['destination'] = v
118
120
  end
121
+ opts.on('-k', '--keyspace KEYSPACE', 'keyspace that will be restored (by default all are restored)') do |v|
122
+ command_line_config['restore']['keyspace'] = v
123
+ end
124
+ opts.on('-t', '--table TABLE', 'table that will be restored (by default all are restored)') do |v|
125
+ command_line_config['restore']['table'] = v
126
+ end
119
127
 
120
128
  opts.separator ''
121
129
  opts.separator 'Hadoop (WebHDFS):'
@@ -194,7 +202,8 @@ begin
194
202
  # Restore a snapshot
195
203
  elsif action == 'restore'
196
204
  raise('No date given') unless options.include? 'date'
197
- bck.restore_snapshot(options['node'], options['date'], options['restore']['destination'])
205
+ bck.restore_snapshot(options['node'], options['date'], options['restore']['destination'],
206
+ keyspace: options['restore']['keyspace'], table: options['restore']['table'])
198
207
 
199
208
  # List snapshots
200
209
  elsif action == 'list'
data/lib/backuptool.rb CHANGED
@@ -243,7 +243,7 @@ class BackupTool
243
243
  # - +node+ -> node where the snapshot comes from
244
244
  # - +date+ -> snapshot date
245
245
  # - +destination+ -> local directory where to restore
246
- def restore_snapshot(node, date, destination)
246
+ def restore_snapshot(node, date, destination, keyspace: 'ALL', table: 'ALL')
247
247
  # Search the snapshot matching node and date
248
248
  snapshots = search_snapshots(node: node, date: date)
249
249
 
@@ -254,10 +254,20 @@ class BackupTool
254
254
  else
255
255
  snapshot = snapshots[0]
256
256
  @logger.info("Restoring snapshot #{snapshot}")
257
- @logger.info("#{snapshot.metadata.length} files to restore")
257
+ @logger.info("Snapshot has #{snapshot.metadata.length} files")
258
258
 
259
- # For each file in metadata
260
- snapshot.metadata.each do |file|
259
+ files_to_be_restored = snapshot.metadata.select { |item|
260
+ filename = File.basename(item)
261
+ matches_keyspace = keyspace == 'ALL' || (filename.include? keyspace)
262
+ matches_table = table == 'ALL' || (filename.include? table)
263
+ matches_keyspace && matches_table
264
+ }
265
+
266
+ @logger.info("Found #{files_to_be_restored.length} to be restored that match
267
+ keyspace #{keyspace} and table #{table}")
268
+
269
+ # For each file in the list
270
+ files_to_be_restored.each do |file|
261
271
  @logger.info("Restoring file #{file}")
262
272
  local = destination + '/' + file
263
273
  remote = @hadoop.base_dir + '/' + snapshot.cluster + '/' + snapshot.node + '/' + file
@@ -1,3 +1,3 @@
1
1
  module Cassback
2
- VERSION = '0.2.2'.freeze
2
+ VERSION = '0.2.3'.freeze
3
3
  end
@@ -12,14 +12,17 @@ class CassandraStub
12
12
  @data_path = 'test/cassandra' + '/' + cluster_name + '/' + node_name + '/'
13
13
  FileUtils.mkdir_p(@data_path)
14
14
 
15
- # create some fake sstables
16
15
  @metadata = Set.new
16
+ end
17
+ end
18
+
19
+ public
20
+ def add_fake_files(file_indexes, keyspace, table)
17
21
  file_indexes.each do |index|
18
- file_name = "SSTable-#{index}-Data.db"
22
+ file_name = "#{keyspace}-#{table}#{index}-Data.db"
19
23
  file_path = @data_path + '/' + file_name
20
24
  File.open(file_path, 'w') { |file| file.write('This is a test file that simulates an SSTable') }
21
25
  @metadata.add(file_name)
22
- end
23
26
  end
24
27
 
25
28
  def new_snapshot
@@ -6,9 +6,12 @@ require_relative '../lib/backuptool'
6
6
  require_relative 'hadoop_stub'
7
7
  require_relative 'cassandra_stub'
8
8
 
9
- class TestSimpleNumber < Test::Unit::TestCase
9
+ class TestBackupTool < Test::Unit::TestCase
10
10
  def test_new_snapshot
11
11
  hadoop = HadoopStub.new('test/hadoop')
12
+
13
+ clean_test_data(hadoop)
14
+
12
15
  create_new_snapshot(hadoop, 'node1', '2016_04_22', [1, 2])
13
16
 
14
17
  remote_files = hadoop.list_files('test/hadoop')
@@ -26,12 +29,14 @@ class TestSimpleNumber < Test::Unit::TestCase
26
29
  assert(metadata_content.include?('SSTable-2-Data.db'))
27
30
 
28
31
  # cleanup
29
- hadoop.delete('test/hadoop')
30
- hadoop.delete('test/cassandra')
32
+ clean_test_data(hadoop)
31
33
  end
32
34
 
33
35
  def test_two_snapshots
34
36
  hadoop = HadoopStub.new('test/hadoop')
37
+
38
+ clean_test_data(hadoop)
39
+
35
40
  create_new_snapshot(hadoop, 'node1', '2016_04_22', [1, 2])
36
41
  create_new_snapshot(hadoop, 'node1', '2016_04_23', [2, 3, 4])
37
42
 
@@ -60,12 +65,14 @@ class TestSimpleNumber < Test::Unit::TestCase
60
65
  assert(metadata_content.include?('SSTable-4-Data.db'))
61
66
 
62
67
  # cleanup
63
- hadoop.delete('test/hadoop')
64
- hadoop.delete('test/cassandra')
68
+ clean_test_data(hadoop)
65
69
  end
66
70
 
67
71
  def test_restore
68
72
  hadoop = HadoopStub.new('test/hadoop')
73
+
74
+ clean_test_data(hadoop)
75
+
69
76
  backup_tool = create_new_snapshot(hadoop, 'node1', '2016_04_22', [1, 2])
70
77
 
71
78
  # restore a newly created snapshot
@@ -78,13 +85,36 @@ class TestSimpleNumber < Test::Unit::TestCase
78
85
  assert_equal('test/restore/SSTable-2-Data.db', restored_files[1])
79
86
 
80
87
  # cleanup
81
- hadoop.delete('test/hadoop')
82
- hadoop.delete('test/restore')
83
- hadoop.delete('test/cassandra')
88
+ clean_test_data(hadoop)
89
+ end
90
+
91
+ def test_restore_with_filtering
92
+ hadoop = HadoopStub.new('test/hadoop')
93
+ clean_test_data(hadoop)
94
+
95
+ keyspace = 'profile'
96
+ table = 'hash_status'
97
+
98
+ backup_tool = create_new_snapshot_2(hadoop, 'node1', '2016_04_22', [1, 2], keyspace, table)
99
+
100
+ # restore a newly created snapshot
101
+ backup_tool.restore_snapshot('node1', '2016_04_22', 'test/restore', keyspace: keyspace, table: table)
102
+
103
+ restored_files = hadoop.list_files('test/restore')
104
+ # two files were restored
105
+ assert_equal(2, restored_files.size)
106
+ assert_equal('test/restore/profile-hash_status1-Data.db', restored_files[0])
107
+ assert_equal('test/restore/profile-hash_status2-Data.db', restored_files[1])
108
+
109
+ # cleanup
110
+ clean_test_data(hadoop)
84
111
  end
85
112
 
86
113
  def test_delete
87
114
  hadoop = HadoopStub.new('test/hadoop')
115
+
116
+ clean_test_data(hadoop)
117
+
88
118
  backup_tool = create_new_snapshot(hadoop, 'node1', '2016_04_22', [1, 2])
89
119
 
90
120
  # delete a newly created snapshot
@@ -93,11 +123,14 @@ class TestSimpleNumber < Test::Unit::TestCase
93
123
  remote_files = hadoop.list_files('test/hadoop')
94
124
  assert_equal(0, remote_files.size)
95
125
 
96
- hadoop.delete('test/cassandra')
126
+ clean_test_data(hadoop)
97
127
  end
98
128
 
99
129
  def test_backup_flag
100
130
  hadoop = HadoopStub.new('test/hadoop')
131
+
132
+ clean_test_data(hadoop)
133
+
101
134
  backup_tool = create_new_snapshot(hadoop, 'node1', '2016_04_22', [1, 2])
102
135
 
103
136
  backup_tool.create_backup_flag('2016_04_22')
@@ -108,12 +141,14 @@ class TestSimpleNumber < Test::Unit::TestCase
108
141
  assert_equal('test/hadoop/cass_snap_metadata/cluster1/BACKUP_COMPLETED_2016_04_22', remote_files[0])
109
142
 
110
143
  # cleanup
111
- hadoop.delete('test/hadoop')
112
- hadoop.delete('test/cassandra')
144
+ clean_test_data(hadoop)
113
145
  end
114
146
 
115
147
  def test_get_backup_flag
116
148
  hadoop = HadoopStub.new('test/hadoop')
149
+
150
+ clean_test_data(hadoop)
151
+
117
152
  backup_tool = create_new_snapshot(hadoop, 'node1', '2016_04_22', [1, 2])
118
153
 
119
154
  backup_tool.create_backup_flag('2016_04_22')
@@ -126,12 +161,14 @@ class TestSimpleNumber < Test::Unit::TestCase
126
161
  assert_equal('BACKUP_COMPLETED_2016_04_22', flags[0].file)
127
162
 
128
163
  # cleanup
129
- hadoop.delete('test/hadoop')
130
- hadoop.delete('test/cassandra')
164
+ clean_test_data(hadoop)
131
165
  end
132
166
 
133
167
  def test_cleanup
134
168
  hadoop = HadoopStub.new('test/hadoop')
169
+
170
+ clean_test_data(hadoop)
171
+
135
172
  retention_days = 30
136
173
 
137
174
  date_31_days_back = (Date.today - 31).strftime('%Y_%m_%d')
@@ -164,17 +201,43 @@ class TestSimpleNumber < Test::Unit::TestCase
164
201
  assert_equal("BACKUP_COMPLETED_#{date_30_days_back}", backup_flags[0].file)
165
202
 
166
203
  # cleanup
167
- hadoop.delete('test/hadoop')
168
- hadoop.delete('test/cassandra')
204
+ clean_test_data(hadoop)
169
205
  end
170
206
 
171
207
  def create_new_snapshot(hadoop, node, date, file_indexes)
172
208
  logger = Logger.new(STDOUT)
173
209
  cassandra = CassandraStub.new('cluster1', node, date, file_indexes)
210
+
211
+ # Add some fake files
212
+ cassandra.add_fake_files(file_indexes, 'SSTable', '')
213
+
174
214
  backup_tool = BackupTool.new(cassandra, hadoop, logger)
215
+ backup_tool.new_snapshot
216
+
217
+ backup_tool
218
+ end
219
+
220
+ def create_new_snapshot_2(hadoop, node, date, file_indexes, keyspace, table)
221
+ logger = Logger.new(STDOUT)
222
+
223
+ cassandra = CassandraStub.new('cluster1', node, date, file_indexes)
224
+
225
+ # Add some files with keyspace and table
226
+ cassandra.add_fake_files(file_indexes, keyspace, table)
227
+ # Add some files not matching keyspace and table
228
+ cassandra.add_fake_files(file_indexes, 'test', 'test')
229
+
175
230
 
231
+ backup_tool = BackupTool.new(cassandra, hadoop, logger)
176
232
  backup_tool.new_snapshot
177
233
 
178
234
  backup_tool
179
235
  end
236
+
237
+ def clean_test_data(hadoop)
238
+ # cleanup
239
+ hadoop.delete('test/hadoop')
240
+ hadoop.delete('test/restore')
241
+ hadoop.delete('test/cassandra')
242
+ end
180
243
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cassback
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Vincent Van Hollebeke
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-09-21 00:00:00.000000000 Z
12
+ date: 2016-10-28 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -168,7 +168,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
168
168
  version: '0'
169
169
  requirements: []
170
170
  rubyforge_project:
171
- rubygems_version: 2.5.1
171
+ rubygems_version: 2.4.8
172
172
  signing_key:
173
173
  specification_version: 4
174
174
  summary: Cassandra backup to HDFS.